From a86635c049480f2ba18bd210bd68c58083fdffaf Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 13:35:56 +0100 Subject: [PATCH 01/83] chore(run-store): scaffold @internal/run-store package --- apps/webapp/package.json | 1 + internal-packages/run-engine/package.json | 1 + internal-packages/run-store/package.json | 31 +++++++ internal-packages/run-store/src/index.ts | 3 + .../run-store/tsconfig.build.json | 21 +++++ internal-packages/run-store/tsconfig.json | 8 ++ internal-packages/run-store/tsconfig.src.json | 20 +++++ .../run-store/tsconfig.test.json | 21 +++++ pnpm-lock.yaml | 82 ++++++++----------- 9 files changed, 141 insertions(+), 47 deletions(-) create mode 100644 internal-packages/run-store/package.json create mode 100644 internal-packages/run-store/src/index.ts create mode 100644 internal-packages/run-store/tsconfig.build.json create mode 100644 internal-packages/run-store/tsconfig.json create mode 100644 internal-packages/run-store/tsconfig.src.json create mode 100644 internal-packages/run-store/tsconfig.test.json diff --git a/apps/webapp/package.json b/apps/webapp/package.json index 31d78667323..842c8855f41 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -61,6 +61,7 @@ "@internal/llm-model-catalog": "workspace:*", "@internal/redis": "workspace:*", "@internal/run-engine": "workspace:*", + "@internal/run-store": "workspace:*", "@internal/schedule-engine": "workspace:*", "@internal/tracing": "workspace:*", "@internal/tsql": "workspace:*", diff --git a/internal-packages/run-engine/package.json b/internal-packages/run-engine/package.json index 8b876a1aab6..414452da3b2 100644 --- a/internal-packages/run-engine/package.json +++ b/internal-packages/run-engine/package.json @@ -21,6 +21,7 @@ }, "dependencies": { "@internal/redis": "workspace:*", + "@internal/run-store": "workspace:*", "@trigger.dev/redis-worker": "workspace:*", "@internal/tracing": "workspace:*", "@trigger.dev/core": "workspace:*", diff --git a/internal-packages/run-store/package.json b/internal-packages/run-store/package.json new file mode 100644 index 00000000000..096888c4e96 --- /dev/null +++ b/internal-packages/run-store/package.json @@ -0,0 +1,31 @@ +{ + "name": "@internal/run-store", + "private": true, + "version": "0.0.1", + "main": "./dist/src/index.js", + "types": "./dist/src/index.d.ts", + "type": "module", + "exports": { + ".": { + "@triggerdotdev/source": "./src/index.ts", + "import": "./dist/src/index.js", + "types": "./dist/src/index.d.ts", + "default": "./dist/src/index.js" + } + }, + "dependencies": { + "@trigger.dev/core": "workspace:*", + "@trigger.dev/database": "workspace:*" + }, + "devDependencies": { + "@internal/testcontainers": "workspace:*", + "rimraf": "6.0.1" + }, + "scripts": { + "clean": "rimraf dist", + "typecheck": "tsc --noEmit -p tsconfig.build.json", + "test": "vitest --sequence.concurrent=false --no-file-parallelism", + "build": "pnpm run clean && tsc -p tsconfig.build.json", + "dev": "tsc --watch -p tsconfig.build.json" + } +} diff --git a/internal-packages/run-store/src/index.ts b/internal-packages/run-store/src/index.ts new file mode 100644 index 00000000000..7b1391aaf1c --- /dev/null +++ b/internal-packages/run-store/src/index.ts @@ -0,0 +1,3 @@ +export * from "./types"; +export * from "./PostgresRunStore"; +export * from "./NoopRunStore"; diff --git a/internal-packages/run-store/tsconfig.build.json b/internal-packages/run-store/tsconfig.build.json new file mode 100644 index 00000000000..89c87a3dc67 --- /dev/null +++ b/internal-packages/run-store/tsconfig.build.json @@ -0,0 +1,21 @@ +{ + "include": ["src/**/*.ts"], + "exclude": ["src/**/*.test.ts"], + "compilerOptions": { + "composite": true, + "target": "ES2020", + "lib": ["ES2020", "DOM", "DOM.Iterable", "DOM.AsyncIterable"], + "outDir": "dist", + "module": "Node16", + "moduleResolution": "Node16", + "moduleDetection": "force", + "verbatimModuleSyntax": false, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "isolatedModules": true, + "preserveWatchOutput": true, + "skipLibCheck": true, + "strict": true, + "declaration": true + } +} diff --git a/internal-packages/run-store/tsconfig.json b/internal-packages/run-store/tsconfig.json new file mode 100644 index 00000000000..af630abe1f1 --- /dev/null +++ b/internal-packages/run-store/tsconfig.json @@ -0,0 +1,8 @@ +{ + "references": [{ "path": "./tsconfig.src.json" }, { "path": "./tsconfig.test.json" }], + "compilerOptions": { + "moduleResolution": "Node16", + "module": "Node16", + "customConditions": ["@triggerdotdev/source"] + } +} diff --git a/internal-packages/run-store/tsconfig.src.json b/internal-packages/run-store/tsconfig.src.json new file mode 100644 index 00000000000..0df3d2d222f --- /dev/null +++ b/internal-packages/run-store/tsconfig.src.json @@ -0,0 +1,20 @@ +{ + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "src/**/*.test.ts"], + "compilerOptions": { + "composite": true, + "target": "ES2020", + "lib": ["ES2020", "DOM", "DOM.Iterable", "DOM.AsyncIterable"], + "module": "Node16", + "moduleResolution": "Node16", + "moduleDetection": "force", + "verbatimModuleSyntax": false, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "isolatedModules": true, + "preserveWatchOutput": true, + "skipLibCheck": true, + "strict": true, + "customConditions": ["@triggerdotdev/source"] + } +} diff --git a/internal-packages/run-store/tsconfig.test.json b/internal-packages/run-store/tsconfig.test.json new file mode 100644 index 00000000000..4c06c9f57bb --- /dev/null +++ b/internal-packages/run-store/tsconfig.test.json @@ -0,0 +1,21 @@ +{ + "include": ["src/**/*.test.ts"], + "references": [{ "path": "./tsconfig.src.json" }], + "compilerOptions": { + "composite": true, + "target": "ES2020", + "lib": ["ES2020", "DOM", "DOM.Iterable", "DOM.AsyncIterable"], + "module": "Node16", + "moduleResolution": "Node16", + "moduleDetection": "force", + "verbatimModuleSyntax": false, + "types": ["vitest/globals"], + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "isolatedModules": true, + "preserveWatchOutput": true, + "skipLibCheck": true, + "strict": true, + "customConditions": ["@triggerdotdev/source"] + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f78f48bfb8b..6526674d8c3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -361,6 +361,9 @@ importers: '@internal/run-engine': specifier: workspace:* version: link:../../internal-packages/run-engine + '@internal/run-store': + specifier: workspace:* + version: link:../../internal-packages/run-store '@internal/schedule-engine': specifier: workspace:* version: link:../../internal-packages/schedule-engine @@ -1302,6 +1305,9 @@ importers: '@internal/redis': specifier: workspace:* version: link:../redis + '@internal/run-store': + specifier: workspace:* + version: link:../run-store '@internal/tracing': specifier: workspace:* version: link:../tracing @@ -1346,6 +1352,22 @@ importers: specifier: 6.0.1 version: 6.0.1 + internal-packages/run-store: + dependencies: + '@trigger.dev/core': + specifier: workspace:* + version: link:../../packages/core + '@trigger.dev/database': + specifier: workspace:* + version: link:../database + devDependencies: + '@internal/testcontainers': + specifier: workspace:* + version: link:../testcontainers + rimraf: + specifier: 6.0.1 + version: 6.0.1 + internal-packages/schedule-engine: dependencies: '@internal/redis': @@ -11601,12 +11623,6 @@ packages: deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me hasBin: true - glob@11.0.0: - resolution: {integrity: sha512-9UiX/Bl6J2yaBbxKoEBRm4Cipxgok8kQYcOPEhScPwebu2I0HoQOuYdIO6S3hLuWoZgpDpwQZMzTFxgpkyT76g==} - engines: {node: 20 || >=22} - deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me - hasBin: true - glob@11.1.0: resolution: {integrity: sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw==} engines: {node: 20 || >=22} @@ -12262,10 +12278,6 @@ packages: jackspeak@3.4.3: resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==} - jackspeak@4.0.1: - resolution: {integrity: sha512-cub8rahkh0Q/bw1+GxP7aeSe29hHHn2V4m29nnDlvCdlgU+3UGxkZp7Z53jLUdpX3jdTO0nJZUDl3xvbWc2Xog==} - engines: {node: 20 || >=22} - jackspeak@4.2.3: resolution: {integrity: sha512-ykkVRwrYvFm1nb2AJfKKYPr0emF6IiXDYUaFx4Zn9ZuIH7MrzEZ3sD5RlqGXNRpHtvUHJyOnCEFxOlNDtGo7wg==} engines: {node: 20 || >=22} @@ -13985,10 +13997,6 @@ packages: resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==} engines: {node: '>=16 || 14 >=14.18'} - path-scurry@2.0.0: - resolution: {integrity: sha512-ypGJsmGtdXUOeM5u93TyeIEfEhM6s+ljAhrk5vAvSx8uyY/02OvrZnA0YNGUrPXfpJMgI1ODd3nwz8Npx4O4cg==} - engines: {node: 20 || >=22} - path-scurry@2.0.2: resolution: {integrity: sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==} engines: {node: 18 || 20 || >=22} @@ -20759,7 +20767,7 @@ snapshots: '@isaacs/fs-minipass@4.0.1': dependencies: - minipass: 7.1.2 + minipass: 7.1.3 '@jridgewell/gen-mapping@0.3.13': dependencies: @@ -29420,7 +29428,7 @@ snapshots: fs-minipass@3.0.3: dependencies: - minipass: 7.1.2 + minipass: 7.1.3 fs.realpath@1.0.0: {} @@ -29557,30 +29565,21 @@ snapshots: glob@10.4.5: dependencies: - foreground-child: 3.1.1 + foreground-child: 3.3.1 jackspeak: 3.4.3 minimatch: 9.0.5 - minipass: 7.1.2 + minipass: 7.1.3 package-json-from-dist: 1.0.0 path-scurry: 1.11.1 - glob@11.0.0: - dependencies: - foreground-child: 3.1.1 - jackspeak: 4.0.1 - minimatch: 10.0.1 - minipass: 7.1.2 - package-json-from-dist: 1.0.0 - path-scurry: 2.0.0 - glob@11.1.0: dependencies: foreground-child: 3.3.1 jackspeak: 4.2.3 minimatch: 10.2.5 - minipass: 7.1.2 + minipass: 7.1.3 package-json-from-dist: 1.0.0 - path-scurry: 2.0.0 + path-scurry: 2.0.2 glob@13.0.6: dependencies: @@ -30285,12 +30284,6 @@ snapshots: optionalDependencies: '@pkgjs/parseargs': 0.11.0 - jackspeak@4.0.1: - dependencies: - '@isaacs/cliui': 8.0.2 - optionalDependencies: - '@pkgjs/parseargs': 0.11.0 - jackspeak@4.2.3: dependencies: '@isaacs/cliui': 9.0.0 @@ -31663,7 +31656,7 @@ snapshots: minizlib@3.1.0: dependencies: - minipass: 7.1.2 + minipass: 7.1.3 mixme@0.5.4: {} @@ -32339,12 +32332,7 @@ snapshots: path-scurry@1.11.1: dependencies: lru-cache: 10.4.3 - minipass: 7.1.2 - - path-scurry@2.0.0: - dependencies: - lru-cache: 11.2.4 - minipass: 7.1.2 + minipass: 7.1.3 path-scurry@2.0.2: dependencies: @@ -33656,7 +33644,7 @@ snapshots: resolve-import@2.0.0: dependencies: - glob: 11.0.0 + glob: 11.1.0 walk-up-path: 4.0.0 resolve-pkg-maps@1.0.0: {} @@ -33704,7 +33692,7 @@ snapshots: rimraf@6.0.1: dependencies: - glob: 11.0.0 + glob: 11.1.0 package-json-from-dist: 1.0.0 robust-predicates@3.0.2: {} @@ -34249,7 +34237,7 @@ snapshots: ssri@10.0.5: dependencies: - minipass: 7.1.2 + minipass: 7.1.3 stack-generator@2.0.10: dependencies: @@ -34540,9 +34528,9 @@ snapshots: sync-content@2.0.1: dependencies: - glob: 11.0.0 + glob: 11.1.0 mkdirp: 3.0.1 - path-scurry: 2.0.0 + path-scurry: 2.0.2 rimraf: 6.0.1 tshy: 3.0.2 From d4c1ff4add7d14178bd1d98d71f30fcf062efb78 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 13:39:07 +0100 Subject: [PATCH 02/83] feat(run-store): add shared types and the RunStore interface --- internal-packages/run-store/src/types.ts | 320 +++++++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 internal-packages/run-store/src/types.ts diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts new file mode 100644 index 00000000000..9b83ec3c94e --- /dev/null +++ b/internal-packages/run-store/src/types.ts @@ -0,0 +1,320 @@ +import type { + Prisma, + PrismaClientOrTransaction, + TaskRun, + TaskRunStatus, + TaskRunExecutionStatus, + RuntimeEnvironmentType, + Waitpoint, +} from "@trigger.dev/database"; +import type { TaskRunError } from "@trigger.dev/core/v3/schemas"; + +export type CreateRunSnapshotInput = { + engine: "V2"; + executionStatus: TaskRunExecutionStatus; + description: string; + runStatus: TaskRunStatus; + environmentId: string; + environmentType: RuntimeEnvironmentType; + projectId: string; + organizationId: string; + workerId?: string; + runnerId?: string; +}; + +export type CompletionSnapshotInput = { + executionStatus: "FINISHED"; + description: string; + runStatus: TaskRunStatus; + attemptNumber: number | null; + environmentId: string; + environmentType: RuntimeEnvironmentType; + projectId: string; + organizationId: string; + workerId?: string; + runnerId?: string; +}; + +export type ExpireSnapshotInput = { + engine: "V2"; + executionStatus: "FINISHED"; + description: string; + runStatus: TaskRunStatus; + environmentId: string; + environmentType: RuntimeEnvironmentType; + projectId: string; + organizationId: string; +}; + +export type RescheduleSnapshotInput = { + environmentId: string; + environmentType: RuntimeEnvironmentType; + projectId: string; + organizationId: string; +}; + +export type LockSnapshotInput = { + id: string; + previousSnapshotId: string; + attemptNumber?: number; + environmentId: string; + environmentType: RuntimeEnvironmentType; + projectId: string; + organizationId: string; + checkpointId?: string; + batchId?: string; + completedWaitpointIds: string[]; + completedWaitpointOrder: string[]; + workerId?: string; + runnerId?: string; +}; + +export type RunAssociatedWaitpointInput = { + id: string; + type: "RUN"; + status: "PENDING"; + idempotencyKey: string; + userProvidedIdempotencyKey: false; + projectId: string; + environmentId: string; +}; + +// The ~60 trigger columns (the existing Prisma create `data` minus the nested relation creates). +export type CreateRunData = { + id: string; + engine: "V2"; + status: TaskRunStatus; + friendlyId: string; + runtimeEnvironmentId: string; + environmentType: RuntimeEnvironmentType; + organizationId: string; + projectId: string; + idempotencyKey?: string; + idempotencyKeyExpiresAt?: Date; + idempotencyKeyOptions?: string[]; + taskIdentifier: string; + payload: string; + payloadType: string; + context?: Prisma.InputJsonValue; + traceContext: Prisma.InputJsonValue; + traceId: string; + spanId: string; + parentSpanId?: string; + lockedToVersionId?: string; + taskVersion?: string; + sdkVersion?: string; + cliVersion?: string; + concurrencyKey?: string; + queue: string; + lockedQueueId?: string; + workerQueue?: string; + isTest: boolean; + delayUntil?: Date; + queuedAt?: Date; + maxAttempts?: number; + taskEventStore: string; + priorityMs?: number; + queueTimestamp?: Date; + ttl?: string; + runTags?: string[]; + oneTimeUseToken?: string; + parentTaskRunId?: string; + rootTaskRunId?: string; + replayedFromTaskRunFriendlyId?: string; + batchId?: string; + resumeParentOnCompletion?: boolean; + depth: number; + metadata?: string; + metadataType?: string; + seedMetadata?: string; + seedMetadataType?: string; + maxDurationInSeconds?: number; + machinePreset?: string; + scheduleId?: string; + scheduleInstanceId?: string; + createdAt?: Date; + bulkActionGroupIds?: string[]; + planType?: string; + realtimeStreamsVersion?: string; + streamBasinName?: string; + debounce?: Prisma.InputJsonValue; + annotations?: Prisma.InputJsonValue; +}; + +export type CreateRunInput = { + data: CreateRunData; + snapshot: CreateRunSnapshotInput; + associatedWaitpoint?: RunAssociatedWaitpointInput; +}; + +export type CreateCancelledRunInput = { + data: CreateRunData & { error: Prisma.InputJsonValue; completedAt: Date; updatedAt: Date; attemptNumber: 0 }; + snapshot: CreateRunSnapshotInput; +}; + +export type CreateFailedRunData = { + id: string; + engine: "V2"; + status: "SYSTEM_FAILURE"; + friendlyId: string; + runtimeEnvironmentId: string; + environmentType: RuntimeEnvironmentType; + organizationId: string; + projectId: string; + taskIdentifier: string; + payload: string; + payloadType: string; + context: Prisma.InputJsonValue; + traceContext: Prisma.InputJsonValue; + traceId: string; + spanId: string; + queue: string; + lockedQueueId?: string; + isTest: false; + completedAt: Date; + error: Prisma.InputJsonObject; + parentTaskRunId?: string; + rootTaskRunId?: string; + depth: number; + batchId?: string; + resumeParentOnCompletion?: boolean; + taskEventStore: string; +}; + +export type CreateFailedRunInput = { + data: CreateFailedRunData; + associatedWaitpoint?: RunAssociatedWaitpointInput; +}; + +export type LockRunData = { + lockedAt: Date; + lockedById: string; + lockedToVersionId: string; + lockedQueueId: string; + lockedRetryConfig?: Prisma.InputJsonValue; + startedAt: Date; + baseCostInCents: number; + machinePreset: string; + taskVersion: string; + sdkVersion: string | null; + cliVersion: string | null; + maxDurationInSeconds: number | null; + maxAttempts?: number; + snapshot: LockSnapshotInput; +}; + +export type RewriteDebouncedRunData = { + payload: string; + payloadType: string; + metadata?: string; + metadataType?: string; + maxAttempts?: number; + maxDurationInSeconds?: number; + machinePreset?: string; + runTags?: string[]; +}; + +export type ClearIdempotencyKeyInput = + | { byId: { runId: string; idempotencyKey: string }; byPredicate?: never; byFriendlyIds?: never } + | { byPredicate: { idempotencyKey: string; taskIdentifier: string; runtimeEnvironmentId: string }; byId?: never; byFriendlyIds?: never } + | { byFriendlyIds: string[]; byId?: never; byPredicate?: never }; + +export type TaskRunWithWaitpoint = TaskRun & { associatedWaitpoint: Waitpoint | null }; + +export interface RunStore { + // Create + createRun(params: CreateRunInput, tx?: PrismaClientOrTransaction): Promise; + createCancelledRun(params: CreateCancelledRunInput, tx?: PrismaClientOrTransaction): Promise; + createFailedRun(params: CreateFailedRunInput, tx?: PrismaClientOrTransaction): Promise; + + // Attempt lifecycle + startAttempt( + runId: string, + data: { attemptNumber: number; executedAt?: Date; isWarmStart: boolean }, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + completeAttemptSuccess( + runId: string, + data: { completedAt: Date; output?: string; outputType: string; usageDurationMs: number; costInCents: number; snapshot: CompletionSnapshotInput }, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + recordRetryOutcome( + runId: string, + data: { machinePreset: string; usageDurationMs: number; costInCents: number }, + args: { include: I }, + tx?: PrismaClientOrTransaction + ): Promise>; + requeueRun( + runId: string, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + recordBulkActionMembership(runId: string, bulkActionId: string, tx?: PrismaClientOrTransaction): Promise; + cancelRun( + runId: string, + data: { completedAt?: Date; error: TaskRunError; bulkActionId?: string; usageDurationMs?: number; costInCents?: number }, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + failRunPermanently( + runId: string, + data: { status: TaskRunStatus; completedAt: Date; error: TaskRunError; usageDurationMs: number; costInCents: number }, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + + // Expiry + expireRun( + runId: string, + data: { error: TaskRunError; completedAt: Date; expiredAt: Date; snapshot: ExpireSnapshotInput }, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + expireRunsBatch(runIds: string[], data: { error: TaskRunError; now: Date }, tx?: PrismaClientOrTransaction): Promise; + + // Dequeue / version / checkpoint + lockRunToWorker( + runId: string, + data: LockRunData, + tx?: PrismaClientOrTransaction + ): Promise>; + parkPendingVersion( + runId: string, + data: { statusReason: string }, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + promotePendingVersionRuns(runId: string, tx?: PrismaClientOrTransaction): Promise<{ count: number }>; + suspendForCheckpoint( + runId: string, + args: { include: I }, + tx?: PrismaClientOrTransaction + ): Promise>; + resumeFromCheckpoint( + runId: string, + args: { select: S }, + tx?: PrismaClientOrTransaction + ): Promise>; + + // Delayed / debounce + rescheduleRun( + runId: string, + data: { delayUntil: Date; queueTimestamp?: Date; snapshot?: RescheduleSnapshotInput }, + tx?: PrismaClientOrTransaction + ): Promise; + enqueueDelayedRun(runId: string, data: { queuedAt: Date }, tx?: PrismaClientOrTransaction): Promise; + rewriteDebouncedRun(runId: string, data: RewriteDebouncedRunData, tx?: PrismaClientOrTransaction): Promise; + + // Field touches + updateMetadata( + runId: string, + data: { metadata: string | null; metadataType?: string; metadataVersion: { increment: number }; updatedAt: Date }, + options: { expectedMetadataVersion?: number }, + tx?: PrismaClientOrTransaction + ): Promise<{ count: number }>; + clearIdempotencyKey(params: ClearIdempotencyKeyInput, tx?: PrismaClientOrTransaction): Promise<{ count: number }>; + pushTags(runId: string, tags: string[], where: { runtimeEnvironmentId: string }, tx?: PrismaClientOrTransaction): Promise<{ updatedAt: Date }>; + pushRealtimeStream(runId: string, streamId: string, tx?: PrismaClientOrTransaction): Promise; +} From 6d7ababeef997080093c3156a6432cd3b00d630b Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 13:40:48 +0100 Subject: [PATCH 03/83] chore(run-store): use .js extensions in index re-exports for Node16 resolution --- internal-packages/run-store/src/index.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal-packages/run-store/src/index.ts b/internal-packages/run-store/src/index.ts index 7b1391aaf1c..de9f7620d7c 100644 --- a/internal-packages/run-store/src/index.ts +++ b/internal-packages/run-store/src/index.ts @@ -1,3 +1,3 @@ -export * from "./types"; -export * from "./PostgresRunStore"; -export * from "./NoopRunStore"; +export * from "./types.js"; +export * from "./PostgresRunStore.js"; +export * from "./NoopRunStore.js"; From 010cf17dae36185727fac5092e3dda7bf3ddb3e3 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 13:41:40 +0100 Subject: [PATCH 04/83] feat(run-store): add NoopRunStore test double --- .../run-store/src/NoopRunStore.ts | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 internal-packages/run-store/src/NoopRunStore.ts diff --git a/internal-packages/run-store/src/NoopRunStore.ts b/internal-packages/run-store/src/NoopRunStore.ts new file mode 100644 index 00000000000..3b4fb0a36fe --- /dev/null +++ b/internal-packages/run-store/src/NoopRunStore.ts @@ -0,0 +1,32 @@ +import type { RunStore } from "./types.js"; + +/** Test double: throws on any call. Inject into units that must not write runs. */ +export class NoopRunStore implements RunStore { + private fail(method: string): never { + throw new Error(`NoopRunStore.${method} called`); + } + createRun(): never { return this.fail("createRun"); } + createCancelledRun(): never { return this.fail("createCancelledRun"); } + createFailedRun(): never { return this.fail("createFailedRun"); } + startAttempt(): never { return this.fail("startAttempt"); } + completeAttemptSuccess(): never { return this.fail("completeAttemptSuccess"); } + recordRetryOutcome(): never { return this.fail("recordRetryOutcome"); } + requeueRun(): never { return this.fail("requeueRun"); } + recordBulkActionMembership(): never { return this.fail("recordBulkActionMembership"); } + cancelRun(): never { return this.fail("cancelRun"); } + failRunPermanently(): never { return this.fail("failRunPermanently"); } + expireRun(): never { return this.fail("expireRun"); } + expireRunsBatch(): never { return this.fail("expireRunsBatch"); } + lockRunToWorker(): never { return this.fail("lockRunToWorker"); } + parkPendingVersion(): never { return this.fail("parkPendingVersion"); } + promotePendingVersionRuns(): never { return this.fail("promotePendingVersionRuns"); } + suspendForCheckpoint(): never { return this.fail("suspendForCheckpoint"); } + resumeFromCheckpoint(): never { return this.fail("resumeFromCheckpoint"); } + rescheduleRun(): never { return this.fail("rescheduleRun"); } + enqueueDelayedRun(): never { return this.fail("enqueueDelayedRun"); } + rewriteDebouncedRun(): never { return this.fail("rewriteDebouncedRun"); } + updateMetadata(): never { return this.fail("updateMetadata"); } + clearIdempotencyKey(): never { return this.fail("clearIdempotencyKey"); } + pushTags(): never { return this.fail("pushTags"); } + pushRealtimeStream(): never { return this.fail("pushRealtimeStream"); } +} From 72a7462a71605ae52b6ed8f358cc18317f68f29e Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 13:50:48 +0100 Subject: [PATCH 05/83] feat(run-store): add PostgresRunStore with createRun --- .../run-store/src/PostgresRunStore.test.ts | 112 +++++++ .../run-store/src/PostgresRunStore.ts | 293 ++++++++++++++++++ internal-packages/run-store/src/types.ts | 1 + internal-packages/run-store/vitest.config.mts | 11 + 4 files changed, 417 insertions(+) create mode 100644 internal-packages/run-store/src/PostgresRunStore.test.ts create mode 100644 internal-packages/run-store/src/PostgresRunStore.ts create mode 100644 internal-packages/run-store/vitest.config.mts diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts new file mode 100644 index 00000000000..661e5368192 --- /dev/null +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -0,0 +1,112 @@ +import { postgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { describe, expect } from "vitest"; +import { PostgresRunStore } from "./PostgresRunStore.js"; +import type { CreateRunInput } from "./types.js"; + +async function seedEnvironment(prisma: PrismaClient) { + const organization = await prisma.organization.create({ + data: { + title: "Test Organization", + slug: "test-organization", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "Test Project", + slug: "test-project", + externalRef: "proj_1234", + organizationId: organization.id, + }, + }); + + const environment = await prisma.runtimeEnvironment.create({ + data: { + type: "DEVELOPMENT", + slug: "dev", + projectId: project.id, + organizationId: organization.id, + apiKey: "tr_dev_apikey", + pkApiKey: "pk_dev_apikey", + shortcode: "short_code", + }, + }); + + return { organization, project, environment }; +} + +function buildCreateRunInput(params: { + runId: string; + organizationId: string; + projectId: string; + runtimeEnvironmentId: string; +}): CreateRunInput { + return { + data: { + id: params.runId, + engine: "V2", + status: "PENDING", + friendlyId: "run_friendly_1", + runtimeEnvironmentId: params.runtimeEnvironmentId, + environmentType: "DEVELOPMENT", + organizationId: params.organizationId, + projectId: params.projectId, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_1", + spanId: "span_1", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + snapshot: { + engine: "V2", + executionStatus: "RUN_CREATED", + description: "Run was created", + runStatus: "PENDING", + environmentId: params.runtimeEnvironmentId, + environmentType: "DEVELOPMENT", + projectId: params.projectId, + organizationId: params.organizationId, + }, + }; +} + +describe("PostgresRunStore", () => { + postgresTest("createRun creates the run with one snapshot and no waitpoint", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ + prisma, + // The read-only client just needs to be a PrismaClient for these tests. + readOnlyPrisma: prisma, + }); + + const runId = "run_test_1"; + + const run = await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + expect(run.id).toBe(runId); + expect(run.status).toBe("PENDING"); + expect(run.associatedWaitpoint).toBeNull(); + + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId }, + }); + + expect(snapshots).toHaveLength(1); + expect(snapshots[0]?.executionStatus).toBe("RUN_CREATED"); + expect(snapshots[0]?.runStatus).toBe("PENDING"); + }); +}); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts new file mode 100644 index 00000000000..777b1a60979 --- /dev/null +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -0,0 +1,293 @@ +import type { + Prisma, + PrismaClient, + PrismaClientOrTransaction, + PrismaReplicaClient, + TaskRun, + TaskRunStatus, +} from "@trigger.dev/database"; +import type { + ClearIdempotencyKeyInput, + CompletionSnapshotInput, + CreateCancelledRunInput, + CreateFailedRunInput, + CreateRunInput, + ExpireSnapshotInput, + LockRunData, + RescheduleSnapshotInput, + RewriteDebouncedRunData, + RunStore, + TaskRunWithWaitpoint, +} from "./types.js"; +import type { TaskRunError } from "@trigger.dev/core/v3/schemas"; + +export type PostgresRunStoreOptions = { + prisma: PrismaClient; + readOnlyPrisma: PrismaReplicaClient; +}; + +/** + * Typed write layer for the task-run row, backed by the `taskRun` Prisma model. + * + * Each method is a verbatim relocation of the Prisma statement that lives at a + * specific call site today. Methods write through `(tx ?? this.prisma).taskRun` + * so callers can opt into an existing transaction. Errors (including unique + * constraint violations) propagate to the caller unchanged. + */ +export class PostgresRunStore implements RunStore { + private readonly prisma: PrismaClient; + private readonly readOnlyPrisma: PrismaReplicaClient; + + constructor(options: PostgresRunStoreOptions) { + this.prisma = options.prisma; + this.readOnlyPrisma = options.readOnlyPrisma; + } + + async createRun( + params: CreateRunInput, + tx?: PrismaClientOrTransaction + ): Promise { + const client = tx ?? this.prisma; + + return client.taskRun.create({ + include: { + associatedWaitpoint: true, + }, + data: { + ...params.data, + executionSnapshots: { + create: { + engine: params.snapshot.engine, + executionStatus: params.snapshot.executionStatus, + description: params.snapshot.description, + runStatus: params.snapshot.runStatus, + environmentId: params.snapshot.environmentId, + environmentType: params.snapshot.environmentType, + projectId: params.snapshot.projectId, + organizationId: params.snapshot.organizationId, + workerId: params.snapshot.workerId, + runnerId: params.snapshot.runnerId, + }, + }, + associatedWaitpoint: params.associatedWaitpoint + ? { + create: params.associatedWaitpoint, + } + : undefined, + }, + }); + } + + createCancelledRun( + _params: CreateCancelledRunInput, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } + + createFailedRun( + _params: CreateFailedRunInput, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } + + startAttempt( + _runId: string, + _data: { attemptNumber: number; executedAt?: Date; isWarmStart: boolean }, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + completeAttemptSuccess( + _runId: string, + _data: { + completedAt: Date; + output?: string; + outputType: string; + usageDurationMs: number; + costInCents: number; + snapshot: CompletionSnapshotInput; + }, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + recordRetryOutcome( + _runId: string, + _data: { machinePreset: string; usageDurationMs: number; costInCents: number }, + _args: { include: I }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + requeueRun( + _runId: string, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + recordBulkActionMembership( + _runId: string, + _bulkActionId: string, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } + + cancelRun( + _runId: string, + _data: { + completedAt?: Date; + error: TaskRunError; + bulkActionId?: string; + usageDurationMs?: number; + costInCents?: number; + }, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + failRunPermanently( + _runId: string, + _data: { + status: TaskRunStatus; + completedAt: Date; + error: TaskRunError; + usageDurationMs: number; + costInCents: number; + }, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + expireRun( + _runId: string, + _data: { error: TaskRunError; completedAt: Date; expiredAt: Date; snapshot: ExpireSnapshotInput }, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + expireRunsBatch( + _runIds: string[], + _data: { error: TaskRunError; now: Date }, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } + + lockRunToWorker( + _runId: string, + _data: LockRunData, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + parkPendingVersion( + _runId: string, + _data: { statusReason: string }, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + promotePendingVersionRuns( + _runId: string, + _tx?: PrismaClientOrTransaction + ): Promise<{ count: number }> { + throw new Error("not implemented"); + } + + suspendForCheckpoint( + _runId: string, + _args: { include: I }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + resumeFromCheckpoint( + _runId: string, + _args: { select: S }, + _tx?: PrismaClientOrTransaction + ): Promise> { + throw new Error("not implemented"); + } + + rescheduleRun( + _runId: string, + _data: { delayUntil: Date; queueTimestamp?: Date; snapshot?: RescheduleSnapshotInput }, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } + + enqueueDelayedRun( + _runId: string, + _data: { queuedAt: Date }, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } + + rewriteDebouncedRun( + _runId: string, + _data: RewriteDebouncedRunData, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } + + updateMetadata( + _runId: string, + _data: { + metadata: string | null; + metadataType?: string; + metadataVersion: { increment: number }; + updatedAt: Date; + }, + _options: { expectedMetadataVersion?: number }, + _tx?: PrismaClientOrTransaction + ): Promise<{ count: number }> { + throw new Error("not implemented"); + } + + clearIdempotencyKey( + _params: ClearIdempotencyKeyInput, + _tx?: PrismaClientOrTransaction + ): Promise<{ count: number }> { + throw new Error("not implemented"); + } + + pushTags( + _runId: string, + _tags: string[], + _where: { runtimeEnvironmentId: string }, + _tx?: PrismaClientOrTransaction + ): Promise<{ updatedAt: Date }> { + throw new Error("not implemented"); + } + + pushRealtimeStream( + _runId: string, + _streamId: string, + _tx?: PrismaClientOrTransaction + ): Promise { + throw new Error("not implemented"); + } +} diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index 9b83ec3c94e..9bef8219183 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -71,6 +71,7 @@ export type LockSnapshotInput = { export type RunAssociatedWaitpointInput = { id: string; + friendlyId: string; type: "RUN"; status: "PENDING"; idempotencyKey: string; diff --git a/internal-packages/run-store/vitest.config.mts b/internal-packages/run-store/vitest.config.mts new file mode 100644 index 00000000000..9ba46467cad --- /dev/null +++ b/internal-packages/run-store/vitest.config.mts @@ -0,0 +1,11 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + include: ["**/*.test.ts"], + globals: true, + isolate: true, + fileParallelism: false, + testTimeout: 120_000, + }, +}); From 2e6322300f25eb11e8f8b9c1394f2a14ca805d13 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 13:56:32 +0100 Subject: [PATCH 06/83] feat(run-store): implement createCancelledRun and createFailedRun --- .../run-store/src/PostgresRunStore.test.ts | 125 +++++++++++++++++- .../run-store/src/PostgresRunStore.ts | 50 +++++-- 2 files changed, 166 insertions(+), 9 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 661e5368192..5c793196cc0 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -2,7 +2,7 @@ import { postgresTest } from "@internal/testcontainers"; import type { PrismaClient } from "@trigger.dev/database"; import { describe, expect } from "vitest"; import { PostgresRunStore } from "./PostgresRunStore.js"; -import type { CreateRunInput } from "./types.js"; +import type { CreateCancelledRunInput, CreateFailedRunInput, CreateRunInput } from "./types.js"; async function seedEnvironment(prisma: PrismaClient) { const organization = await prisma.organization.create({ @@ -109,4 +109,127 @@ describe("PostgresRunStore", () => { expect(snapshots[0]?.executionStatus).toBe("RUN_CREATED"); expect(snapshots[0]?.runStatus).toBe("PENDING"); }); + + postgresTest( + "createCancelledRun creates a CANCELED run with one FINISHED/CANCELED execution snapshot", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ + prisma, + readOnlyPrisma: prisma, + }); + + const runId = "run_cancelled_1"; + const cancelledAt = new Date("2026-01-01T00:00:00.000Z"); + const error = { type: "STRING_ERROR", raw: "cancelled before dispatch" }; + + const input: CreateCancelledRunInput = { + data: { + id: runId, + engine: "V2", + status: "CANCELED", + friendlyId: "run_cancelled_friendly_1", + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_c1", + spanId: "span_c1", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + error: error as unknown as import("@trigger.dev/database").Prisma.InputJsonValue, + completedAt: cancelledAt, + updatedAt: cancelledAt, + attemptNumber: 0, + }, + snapshot: { + engine: "V2", + executionStatus: "FINISHED", + description: "Run cancelled before materialisation", + runStatus: "CANCELED", + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + }, + }; + + const run = await store.createCancelledRun(input); + + expect(run.id).toBe(runId); + expect(run.status).toBe("CANCELED"); + expect(run.attemptNumber).toBe(0); + expect(run.completedAt).toEqual(cancelledAt); + + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId }, + }); + + expect(snapshots).toHaveLength(1); + expect(snapshots[0]?.executionStatus).toBe("FINISHED"); + expect(snapshots[0]?.runStatus).toBe("CANCELED"); + } + ); + + postgresTest( + "createFailedRun creates a SYSTEM_FAILURE run with no execution snapshot and null associatedWaitpoint when not provided", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ + prisma, + readOnlyPrisma: prisma, + }); + + const runId = "run_failed_1"; + const completedAt = new Date("2026-01-01T00:00:00.000Z"); + const error = { type: "STRING_ERROR", raw: "system failure" }; + + const input: CreateFailedRunInput = { + data: { + id: runId, + engine: "V2", + status: "SYSTEM_FAILURE", + friendlyId: "run_failed_friendly_1", + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "trace_f1", + spanId: "span_f1", + queue: "task/my-task", + isTest: false, + completedAt, + error: error as unknown as import("@trigger.dev/database").Prisma.InputJsonObject, + depth: 0, + taskEventStore: "taskEvent", + }, + }; + + const run = await store.createFailedRun(input); + + expect(run.id).toBe(runId); + expect(run.status).toBe("SYSTEM_FAILURE"); + expect(run.completedAt).toEqual(completedAt); + expect(run.associatedWaitpoint).toBeNull(); + + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId }, + }); + + expect(snapshots).toHaveLength(0); + } + ); }); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 777b1a60979..a5caf83c002 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -78,18 +78,52 @@ export class PostgresRunStore implements RunStore { }); } - createCancelledRun( - _params: CreateCancelledRunInput, - _tx?: PrismaClientOrTransaction + async createCancelledRun( + params: CreateCancelledRunInput, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const client = tx ?? this.prisma; + + return client.taskRun.create({ + data: { + ...params.data, + executionSnapshots: { + create: { + engine: params.snapshot.engine, + executionStatus: params.snapshot.executionStatus, + description: params.snapshot.description, + runStatus: params.snapshot.runStatus, + environmentId: params.snapshot.environmentId, + environmentType: params.snapshot.environmentType, + projectId: params.snapshot.projectId, + organizationId: params.snapshot.organizationId, + workerId: params.snapshot.workerId, + runnerId: params.snapshot.runnerId, + }, + }, + }, + }); } - createFailedRun( - _params: CreateFailedRunInput, - _tx?: PrismaClientOrTransaction + async createFailedRun( + params: CreateFailedRunInput, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const client = tx ?? this.prisma; + + return client.taskRun.create({ + include: { + associatedWaitpoint: true, + }, + data: { + ...params.data, + associatedWaitpoint: params.associatedWaitpoint + ? { + create: params.associatedWaitpoint, + } + : undefined, + }, + }); } startAttempt( From f8456c142a6c89f7d03731371da5b334b412cff3 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 13:59:39 +0100 Subject: [PATCH 07/83] feat(run-store): implement attempt lifecycle, cancel, and fail methods Replaces the seven throwing stubs on PostgresRunStore with verbatim relocations of the Prisma statements from runAttemptSystem: startAttempt, completeAttemptSuccess, recordRetryOutcome, requeueRun, recordBulkActionMembership, cancelRun, and failRunPermanently. Each method splices the caller-supplied select/include into the Prisma call. Tests use real Postgres containers and cover each method including edge cases (append semantics, conditional fields in cancelRun). --- .../run-store/src/PostgresRunStore.test.ts | 319 ++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 170 +++++++--- 2 files changed, 449 insertions(+), 40 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 5c793196cc0..2a99aaf327e 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -232,4 +232,323 @@ describe("PostgresRunStore", () => { expect(snapshots).toHaveLength(0); } ); + + postgresTest("startAttempt sets status to EXECUTING and records attempt fields", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_start_attempt_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const executedAt = new Date("2026-03-01T10:00:00.000Z"); + + const run = await store.startAttempt( + runId, + { attemptNumber: 1, executedAt, isWarmStart: true }, + { select: { id: true, status: true, attemptNumber: true, executedAt: true, isWarmStart: true } } + ); + + expect(run.id).toBe(runId); + expect(run.status).toBe("EXECUTING"); + expect(run.attemptNumber).toBe(1); + expect(run.executedAt).toEqual(executedAt); + expect(run.isWarmStart).toBe(true); + }); + + postgresTest( + "completeAttemptSuccess sets status to COMPLETED_SUCCESSFULLY and creates a FINISHED snapshot", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_complete_success_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const completedAt = new Date("2026-03-01T11:00:00.000Z"); + + const run = await store.completeAttemptSuccess( + runId, + { + completedAt, + output: '{"result":"ok"}', + outputType: "application/json", + usageDurationMs: 500, + costInCents: 10, + snapshot: { + executionStatus: "FINISHED", + description: "Task completed successfully", + runStatus: "COMPLETED_SUCCESSFULLY", + attemptNumber: 1, + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + }, + }, + { + select: { + id: true, + status: true, + completedAt: true, + output: true, + outputType: true, + usageDurationMs: true, + costInCents: true, + }, + } + ); + + expect(run.id).toBe(runId); + expect(run.status).toBe("COMPLETED_SUCCESSFULLY"); + expect(run.completedAt).toEqual(completedAt); + expect(run.output).toBe('{"result":"ok"}'); + expect(run.usageDurationMs).toBe(500); + expect(run.costInCents).toBe(10); + + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId, executionStatus: "FINISHED" }, + }); + + expect(snapshots).toHaveLength(1); + expect(snapshots[0]?.runStatus).toBe("COMPLETED_SUCCESSFULLY"); + } + ); + + postgresTest("recordRetryOutcome updates machine/usage/cost but leaves status unchanged", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_retry_outcome_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + // Set status to EXECUTING first so we know what to verify against + await store.startAttempt(runId, { attemptNumber: 1, isWarmStart: false }, { select: { id: true } }); + + const run = await store.recordRetryOutcome( + runId, + { machinePreset: "large-1x", usageDurationMs: 200, costInCents: 5 }, + { include: { runtimeEnvironment: true } } + ); + + // Status must be unchanged (EXECUTING — not PENDING, not CANCELED) + expect(run.status).toBe("EXECUTING"); + expect(run.machinePreset).toBe("large-1x"); + expect(run.usageDurationMs).toBe(200); + expect(run.costInCents).toBe(5); + }); + + postgresTest("requeueRun sets status to PENDING", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_requeue_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + await store.startAttempt(runId, { attemptNumber: 1, isWarmStart: false }, { select: { id: true } }); + + const run = await store.requeueRun(runId, { select: { id: true, status: true } }); + + expect(run.id).toBe(runId); + expect(run.status).toBe("PENDING"); + }); + + postgresTest("recordBulkActionMembership appends bulkActionId to existing array", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_bulk_action_1"; + + // Seed a run with an existing bulk action id + await prisma.taskRun.create({ + data: { + id: runId, + engine: "V2", + status: "CANCELED", + friendlyId: "run_bulk_action_friendly_1", + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_b1", + spanId: "span_b1", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + bulkActionGroupIds: ["existing-bulk-id"], + }, + }); + + await store.recordBulkActionMembership(runId, "new-bulk-id"); + + const updated = await prisma.taskRun.findUnique({ + where: { id: runId }, + select: { bulkActionGroupIds: true }, + }); + + expect(updated?.bulkActionGroupIds).toContain("existing-bulk-id"); + expect(updated?.bulkActionGroupIds).toContain("new-bulk-id"); + expect(updated?.bulkActionGroupIds).toHaveLength(2); + }); + + postgresTest( + "cancelRun sets status to CANCELED; without bulkActionId/usage those fields are untouched", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_cancel_no_bulk_1"; + + // Seed with a pre-existing bulk action id so we can verify it stays + await prisma.taskRun.create({ + data: { + id: runId, + engine: "V2", + status: "PENDING", + friendlyId: "run_cancel_no_bulk_friendly_1", + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_cn1", + spanId: "span_cn1", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + bulkActionGroupIds: ["x"], + }, + }); + + const cancelledAt = new Date("2026-04-01T00:00:00.000Z"); + const error = { type: "STRING_ERROR" as const, raw: "Canceled by user" }; + + const run = await store.cancelRun( + runId, + { completedAt: cancelledAt, error }, + { select: { id: true, status: true, completedAt: true, bulkActionGroupIds: true, usageDurationMs: true, costInCents: true } } + ); + + expect(run.id).toBe(runId); + expect(run.status).toBe("CANCELED"); + expect(run.completedAt).toEqual(cancelledAt); + // bulkActionGroupIds must be unchanged (still just ["x"]) + expect(run.bulkActionGroupIds).toEqual(["x"]); + // usage fields were not passed — should remain at default (0) + expect(run.usageDurationMs).toBe(0); + expect(run.costInCents).toBe(0); + } + ); + + postgresTest( + "cancelRun with bulkActionId and usage applies all optional fields", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_cancel_with_bulk_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const cancelledAt = new Date("2026-04-01T01:00:00.000Z"); + const error = { type: "STRING_ERROR" as const, raw: "Canceled by user" }; + + const run = await store.cancelRun( + runId, + { completedAt: cancelledAt, error, bulkActionId: "bulk-abc", usageDurationMs: 300, costInCents: 7 }, + { select: { id: true, status: true, bulkActionGroupIds: true, usageDurationMs: true, costInCents: true } } + ); + + expect(run.status).toBe("CANCELED"); + expect(run.bulkActionGroupIds).toContain("bulk-abc"); + expect(run.usageDurationMs).toBe(300); + expect(run.costInCents).toBe(7); + } + ); + + postgresTest("failRunPermanently sets the passed status with completedAt/error/usage/cost", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_fail_permanently_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const completedAt = new Date("2026-05-01T00:00:00.000Z"); + const error = { type: "STRING_ERROR" as const, raw: "permanent failure" }; + + const run = await store.failRunPermanently( + runId, + { status: "SYSTEM_FAILURE", completedAt, error, usageDurationMs: 150, costInCents: 3 }, + { + select: { + id: true, + status: true, + completedAt: true, + usageDurationMs: true, + costInCents: true, + }, + } + ); + + expect(run.id).toBe(runId); + expect(run.status).toBe("SYSTEM_FAILURE"); + expect(run.completedAt).toEqual(completedAt); + expect(run.usageDurationMs).toBe(150); + expect(run.costInCents).toBe(3); + }); }); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index a5caf83c002..c3d1ca61117 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -126,18 +126,29 @@ export class PostgresRunStore implements RunStore { }); } - startAttempt( - _runId: string, - _data: { attemptNumber: number; executedAt?: Date; isWarmStart: boolean }, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + async startAttempt( + runId: string, + data: { attemptNumber: number; executedAt?: Date; isWarmStart: boolean }, + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "EXECUTING", + attemptNumber: data.attemptNumber, + executedAt: data.executedAt, + isWarmStart: data.isWarmStart, + }, + select: args.select, + }) as Promise>; } - completeAttemptSuccess( - _runId: string, - _data: { + async completeAttemptSuccess( + runId: string, + data: { completedAt: Date; output?: string; outputType: string; @@ -145,65 +156,144 @@ export class PostgresRunStore implements RunStore { costInCents: number; snapshot: CompletionSnapshotInput; }, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "COMPLETED_SUCCESSFULLY", + completedAt: data.completedAt, + output: data.output, + outputType: data.outputType, + usageDurationMs: data.usageDurationMs, + costInCents: data.costInCents, + executionSnapshots: { + create: { + executionStatus: data.snapshot.executionStatus, + description: data.snapshot.description, + runStatus: data.snapshot.runStatus, + attemptNumber: data.snapshot.attemptNumber, + environmentId: data.snapshot.environmentId, + environmentType: data.snapshot.environmentType, + projectId: data.snapshot.projectId, + organizationId: data.snapshot.organizationId, + workerId: data.snapshot.workerId, + runnerId: data.snapshot.runnerId, + }, + }, + }, + select: args.select, + }) as Promise>; } - recordRetryOutcome( - _runId: string, - _data: { machinePreset: string; usageDurationMs: number; costInCents: number }, - _args: { include: I }, - _tx?: PrismaClientOrTransaction + async recordRetryOutcome( + runId: string, + data: { machinePreset: string; usageDurationMs: number; costInCents: number }, + args: { include: I }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + machinePreset: data.machinePreset, + usageDurationMs: data.usageDurationMs, + costInCents: data.costInCents, + }, + include: args.include, + }) as Promise>; } - requeueRun( - _runId: string, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + async requeueRun( + runId: string, + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { status: "PENDING" }, + select: args.select, + }) as Promise>; } - recordBulkActionMembership( - _runId: string, - _bulkActionId: string, - _tx?: PrismaClientOrTransaction + async recordBulkActionMembership( + runId: string, + bulkActionId: string, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + await prisma.taskRun.update({ + where: { id: runId }, + data: { + bulkActionGroupIds: { + push: bulkActionId, + }, + }, + }); } - cancelRun( - _runId: string, - _data: { + async cancelRun( + runId: string, + data: { completedAt?: Date; error: TaskRunError; bulkActionId?: string; usageDurationMs?: number; costInCents?: number; }, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "CANCELED", + ...(data.completedAt !== undefined && { completedAt: data.completedAt }), + error: data.error as Prisma.InputJsonValue, + ...(data.bulkActionId !== undefined && { + bulkActionGroupIds: { push: data.bulkActionId }, + }), + ...(data.usageDurationMs !== undefined && { usageDurationMs: data.usageDurationMs }), + ...(data.costInCents !== undefined && { costInCents: data.costInCents }), + }, + select: args.select, + }) as Promise>; } - failRunPermanently( - _runId: string, - _data: { + async failRunPermanently( + runId: string, + data: { status: TaskRunStatus; completedAt: Date; error: TaskRunError; usageDurationMs: number; costInCents: number; }, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: data.status, + completedAt: data.completedAt, + error: data.error as Prisma.InputJsonValue, + usageDurationMs: data.usageDurationMs, + costInCents: data.costInCents, + }, + select: args.select, + }) as Promise>; } expireRun( From f1ab6ae7550a485b1d9d27265af3ab5c96753be4 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:06:25 +0100 Subject: [PATCH 08/83] feat(run-store): implement expiry, dequeue-lock, version, and checkpoint methods --- .../run-store/src/PostgresRunStore.test.ts | 377 ++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 181 +++++++-- 2 files changed, 521 insertions(+), 37 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 2a99aaf327e..f390be6b063 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -551,4 +551,381 @@ describe("PostgresRunStore", () => { expect(run.usageDurationMs).toBe(150); expect(run.costInCents).toBe(3); }); + + postgresTest( + "expireRun sets status to EXPIRED with distinct completedAt/expiredAt, error set, and one FINISHED/EXPIRED snapshot", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_expire_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const completedAt = new Date("2026-06-01T10:00:00.000Z"); + const expiredAt = new Date("2026-06-01T10:00:01.000Z"); + const error = { type: "STRING_ERROR" as const, raw: "Run expired because the TTL was reached" }; + + const run = await store.expireRun( + runId, + { + error, + completedAt, + expiredAt, + snapshot: { + engine: "V2", + executionStatus: "FINISHED", + description: "Run was expired because the TTL was reached", + runStatus: "EXPIRED", + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + }, + }, + { + select: { + id: true, + status: true, + completedAt: true, + expiredAt: true, + error: true, + }, + } + ); + + expect(run.id).toBe(runId); + expect(run.status).toBe("EXPIRED"); + expect(run.completedAt).toEqual(completedAt); + expect(run.expiredAt).toEqual(expiredAt); + // completedAt and expiredAt are distinct + expect(run.completedAt?.getTime()).not.toBe(run.expiredAt?.getTime()); + + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId, executionStatus: "FINISHED", runStatus: "EXPIRED" }, + }); + expect(snapshots).toHaveLength(1); + } + ); + + postgresTest( + "expireRunsBatch sets EXPIRED status with all four timestamps equal to now and error set; returns correct count", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const runId1 = "run_expire_batch_1"; + const runId2 = "run_expire_batch_2"; + + for (const id of [runId1, runId2]) { + await prisma.taskRun.create({ + data: { + id, + engine: "V2", + status: "PENDING", + friendlyId: `run_expire_batch_friendly_${id}`, + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${id}`, + spanId: `span_${id}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + }); + } + + const now = new Date("2026-06-01T12:00:00.000Z"); + const error = { type: "STRING_ERROR" as const, raw: "Run expired because the TTL was reached" }; + + const count = await store.expireRunsBatch([runId1, runId2], { error, now }); + + expect(count).toBe(2); + + for (const id of [runId1, runId2]) { + const row = await prisma.taskRun.findUniqueOrThrow({ + where: { id }, + select: { status: true, completedAt: true, expiredAt: true, updatedAt: true }, + }); + expect(row.status).toBe("EXPIRED"); + expect(row.completedAt).toEqual(now); + expect(row.expiredAt).toEqual(now); + expect(row.updatedAt).toEqual(now); + } + } + ); + + postgresTest( + "lockRunToWorker sets status to DEQUEUED with lock columns, includes runtimeEnvironment, and creates one PENDING_EXECUTING snapshot", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_lock_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + // Seed a background worker task to use as lockedById + const backgroundWorker = await prisma.backgroundWorker.create({ + data: { + friendlyId: "worker_friendly_1", + version: "20260601.1", + runtimeEnvironmentId: environment.id, + projectId: project.id, + contentHash: "abc123", + sdkVersion: "3.0.0", + cliVersion: "3.0.0", + metadata: {}, + }, + }); + + const workerTask = await prisma.backgroundWorkerTask.create({ + data: { + friendlyId: "task_friendly_1", + slug: "my-task", + filePath: "src/my-task.ts", + exportName: "myTask", + workerId: backgroundWorker.id, + runtimeEnvironmentId: environment.id, + projectId: project.id, + }, + }); + + const queue = await prisma.taskQueue.create({ + data: { + friendlyId: "queue_friendly_1", + name: "task/my-task", + runtimeEnvironmentId: environment.id, + projectId: project.id, + }, + }); + + // Seed a prior snapshot to use as previousSnapshotId + const priorSnapshot = await prisma.taskRunExecutionSnapshot.create({ + data: { + engine: "V2", + executionStatus: "RUN_CREATED", + description: "prior", + runStatus: "PENDING", + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + runId, + }, + }); + + const lockedAt = new Date("2026-06-01T13:00:00.000Z"); + const startedAt = new Date("2026-06-01T13:00:01.000Z"); + const snapshotId = "snap_lock_1"; + + const locked = await store.lockRunToWorker(runId, { + lockedAt, + lockedById: workerTask.id, + lockedToVersionId: backgroundWorker.id, + lockedQueueId: queue.id, + startedAt, + baseCostInCents: 5, + machinePreset: "small-1x", + taskVersion: "20260601.1", + sdkVersion: "3.0.0", + cliVersion: "3.0.0", + maxDurationInSeconds: null, + snapshot: { + id: snapshotId, + previousSnapshotId: priorSnapshot.id, + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + completedWaitpointIds: [], + completedWaitpointOrder: [], + }, + }); + + expect(locked.status).toBe("DEQUEUED"); + expect(locked.lockedAt).toEqual(lockedAt); + expect(locked.lockedById).toBe(workerTask.id); + expect(locked.lockedToVersionId).toBe(backgroundWorker.id); + expect(locked.lockedQueueId).toBe(queue.id); + expect(locked.runtimeEnvironment).toBeDefined(); + expect(locked.runtimeEnvironment.id).toBe(environment.id); + + const snap = await prisma.taskRunExecutionSnapshot.findUnique({ where: { id: snapshotId } }); + expect(snap).not.toBeNull(); + expect(snap?.executionStatus).toBe("PENDING_EXECUTING"); + expect(snap?.runStatus).toBe("PENDING"); + } + ); + + postgresTest("parkPendingVersion sets status to PENDING_VERSION and stores statusReason", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_park_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const run = await store.parkPendingVersion( + runId, + { statusReason: "No background worker found" }, + { select: { id: true, status: true, statusReason: true } } + ); + + expect(run.id).toBe(runId); + expect(run.status).toBe("PENDING_VERSION"); + expect(run.statusReason).toBe("No background worker found"); + }); + + postgresTest( + "promotePendingVersionRuns flips PENDING_VERSION to PENDING and returns count 1; run in another status returns count 0 and is unchanged", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // Seed a PENDING_VERSION run + const pendingVersionId = "run_promote_pv_1"; + await prisma.taskRun.create({ + data: { + id: pendingVersionId, + engine: "V2", + status: "PENDING_VERSION", + friendlyId: "run_promote_pv_friendly_1", + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_pv1", + spanId: "span_pv1", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + }); + + const result = await store.promotePendingVersionRuns(pendingVersionId); + + expect(result.count).toBe(1); + + const promoted = await prisma.taskRun.findUniqueOrThrow({ where: { id: pendingVersionId }, select: { status: true } }); + expect(promoted.status).toBe("PENDING"); + + // Seed a run NOT in PENDING_VERSION (e.g. EXECUTING) + const executingId = "run_promote_exec_1"; + await prisma.taskRun.create({ + data: { + id: executingId, + engine: "V2", + status: "EXECUTING", + friendlyId: "run_promote_exec_friendly_1", + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_exec1", + spanId: "span_exec1", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + }); + + const result2 = await store.promotePendingVersionRuns(executingId); + + expect(result2.count).toBe(0); + + const unchanged = await prisma.taskRun.findUniqueOrThrow({ where: { id: executingId }, select: { status: true } }); + expect(unchanged.status).toBe("EXECUTING"); + } + ); + + postgresTest("suspendForCheckpoint sets status to WAITING_TO_RESUME", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_suspend_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const run = await store.suspendForCheckpoint(runId, { + include: { runtimeEnvironment: true }, + }); + + expect(run.id).toBe(runId); + expect(run.status).toBe("WAITING_TO_RESUME"); + expect(run.runtimeEnvironment).toBeDefined(); + }); + + postgresTest("resumeFromCheckpoint sets status to EXECUTING", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_resume_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + // Suspend first so we start from a realistic state + await store.suspendForCheckpoint(runId, { include: {} }); + + const run = await store.resumeFromCheckpoint(runId, { + select: { id: true, status: true }, + }); + + expect(run.id).toBe(runId); + expect(run.status).toBe("EXECUTING"); + }); }); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index c3d1ca61117..37352762d59 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -1,5 +1,5 @@ +import { Prisma } from "@trigger.dev/database"; import type { - Prisma, PrismaClient, PrismaClientOrTransaction, PrismaReplicaClient, @@ -296,61 +296,168 @@ export class PostgresRunStore implements RunStore { }) as Promise>; } - expireRun( - _runId: string, - _data: { error: TaskRunError; completedAt: Date; expiredAt: Date; snapshot: ExpireSnapshotInput }, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + async expireRun( + runId: string, + data: { error: TaskRunError; completedAt: Date; expiredAt: Date; snapshot: ExpireSnapshotInput }, + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "EXPIRED", + completedAt: data.completedAt, + expiredAt: data.expiredAt, + error: data.error as Prisma.InputJsonValue, + executionSnapshots: { + create: { + engine: data.snapshot.engine, + executionStatus: data.snapshot.executionStatus, + description: data.snapshot.description, + runStatus: data.snapshot.runStatus, + environmentId: data.snapshot.environmentId, + environmentType: data.snapshot.environmentType, + projectId: data.snapshot.projectId, + organizationId: data.snapshot.organizationId, + }, + }, + }, + select: args.select, + }) as Promise>; } - expireRunsBatch( - _runIds: string[], - _data: { error: TaskRunError; now: Date }, - _tx?: PrismaClientOrTransaction + async expireRunsBatch( + runIds: string[], + data: { error: TaskRunError; now: Date }, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.$executeRaw` + UPDATE "TaskRun" + SET "status" = 'EXPIRED'::"TaskRunStatus", + "completedAt" = ${data.now}, + "expiredAt" = ${data.now}, + "updatedAt" = ${data.now}, + "error" = ${JSON.stringify(data.error)}::jsonb + WHERE "id" IN (${Prisma.join(runIds)}) + `; } - lockRunToWorker( - _runId: string, - _data: LockRunData, - _tx?: PrismaClientOrTransaction + async lockRunToWorker( + runId: string, + data: LockRunData, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "DEQUEUED", + lockedAt: data.lockedAt, + lockedById: data.lockedById, + lockedToVersionId: data.lockedToVersionId, + lockedQueueId: data.lockedQueueId, + lockedRetryConfig: data.lockedRetryConfig ?? undefined, + startedAt: data.startedAt, + baseCostInCents: data.baseCostInCents, + machinePreset: data.machinePreset, + taskVersion: data.taskVersion, + sdkVersion: data.sdkVersion ?? undefined, + cliVersion: data.cliVersion ?? undefined, + maxDurationInSeconds: data.maxDurationInSeconds ?? undefined, + maxAttempts: data.maxAttempts ?? undefined, + executionSnapshots: { + create: { + id: data.snapshot.id, + engine: "V2", + executionStatus: "PENDING_EXECUTING", + description: "Run was dequeued for execution", + runStatus: "PENDING", + attemptNumber: data.snapshot.attemptNumber ?? undefined, + previousSnapshotId: data.snapshot.previousSnapshotId, + environmentId: data.snapshot.environmentId, + environmentType: data.snapshot.environmentType, + projectId: data.snapshot.projectId, + organizationId: data.snapshot.organizationId, + checkpointId: data.snapshot.checkpointId ?? undefined, + batchId: data.snapshot.batchId ?? undefined, + completedWaitpoints: { + connect: data.snapshot.completedWaitpointIds.map((id) => ({ id })), + }, + completedWaitpointOrder: data.snapshot.completedWaitpointOrder, + workerId: data.snapshot.workerId ?? undefined, + runnerId: data.snapshot.runnerId ?? undefined, + }, + }, + }, + include: { + runtimeEnvironment: true, + }, + }); } - parkPendingVersion( - _runId: string, - _data: { statusReason: string }, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + async parkPendingVersion( + runId: string, + data: { statusReason: string }, + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "PENDING_VERSION", + statusReason: data.statusReason, + }, + select: args.select, + }) as Promise>; } - promotePendingVersionRuns( - _runId: string, - _tx?: PrismaClientOrTransaction + async promotePendingVersionRuns( + runId: string, + tx?: PrismaClientOrTransaction ): Promise<{ count: number }> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + const result = await prisma.taskRun.updateMany({ + where: { id: runId, status: "PENDING_VERSION" }, + data: { status: "PENDING" }, + }); + + return { count: result.count }; } - suspendForCheckpoint( - _runId: string, - _args: { include: I }, - _tx?: PrismaClientOrTransaction + async suspendForCheckpoint( + runId: string, + args: { include: I }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { status: "WAITING_TO_RESUME" }, + include: args.include, + }) as Promise>; } - resumeFromCheckpoint( - _runId: string, - _args: { select: S }, - _tx?: PrismaClientOrTransaction + async resumeFromCheckpoint( + runId: string, + args: { select: S }, + tx?: PrismaClientOrTransaction ): Promise> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { status: "EXECUTING" }, + select: args.select, + }) as Promise>; } rescheduleRun( From f66bbad6e6a2d6450981b10b338684fd893cccb7 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:16:17 +0100 Subject: [PATCH 09/83] feat(run-store): implement reschedule, debounce, metadata, idempotency-clear, and array-append methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the seven throwing stubs in PostgresRunStore with verbatim-relocated Prisma statements sourced from delayedRunSystem, debounceSystem, updateMetadata, idempotencyKeys, resetIdempotencyKey, batchTriggerV3, and the realtime-stream route handlers. - rescheduleRun: writes delayUntil always; queueTimestamp when provided; nested DELAYED executionSnapshot when snapshot arg provided - enqueueDelayedRun: sets status PENDING + queuedAt - rewriteDebouncedRun: pass-through update with associatedWaitpoint include - updateMetadata: optimistic-lock path (updateMany with version predicate) or direct path (update without predicate); both return { count } - clearIdempotencyKey: three discriminated-union branches — byId clears both columns, byPredicate clears both, byFriendlyIds clears only idempotencyKey - pushTags: push-append to runTags array; returns { updatedAt } - pushRealtimeStream: push-append to realtimeStreams array; returns void --- .../run-store/src/PostgresRunStore.test.ts | 552 ++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 162 +++-- 2 files changed, 678 insertions(+), 36 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index f390be6b063..b9301bd70c6 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -929,3 +929,555 @@ describe("PostgresRunStore", () => { expect(run.status).toBe("EXECUTING"); }); }); + +describe("PostgresRunStore — delayed / debounce / metadata / idempotency / array-append", () => { + // Helper: seed a run with idempotency key and expiry set + async function seedRunWithIdempotency( + prisma: PrismaClient, + params: { + runId: string; + friendlyId: string; + organizationId: string; + projectId: string; + runtimeEnvironmentId: string; + taskIdentifier?: string; + idempotencyKey: string; + idempotencyKeyExpiresAt?: Date; + status?: string; + } + ) { + return prisma.taskRun.create({ + data: { + id: params.runId, + engine: "V2", + status: (params.status as any) ?? "PENDING", + friendlyId: params.friendlyId, + runtimeEnvironmentId: params.runtimeEnvironmentId, + environmentType: "DEVELOPMENT", + organizationId: params.organizationId, + projectId: params.projectId, + taskIdentifier: params.taskIdentifier ?? "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${params.runId}`, + spanId: `span_${params.runId}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + idempotencyKey: params.idempotencyKey, + idempotencyKeyExpiresAt: params.idempotencyKeyExpiresAt ?? null, + }, + }); + } + + // Helper: seed a plain run (no idempotency) + async function seedRun( + prisma: PrismaClient, + params: { + runId: string; + friendlyId: string; + organizationId: string; + projectId: string; + runtimeEnvironmentId: string; + status?: string; + runTags?: string[]; + realtimeStreams?: string[]; + metadata?: string; + metadataType?: string; + metadataVersion?: number; + } + ) { + return prisma.taskRun.create({ + data: { + id: params.runId, + engine: "V2", + status: (params.status as any) ?? "PENDING", + friendlyId: params.friendlyId, + runtimeEnvironmentId: params.runtimeEnvironmentId, + environmentType: "DEVELOPMENT", + organizationId: params.organizationId, + projectId: params.projectId, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${params.runId}`, + spanId: `span_${params.runId}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + runTags: params.runTags ?? [], + realtimeStreams: params.realtimeStreams ?? [], + ...(params.metadata !== undefined && { metadata: params.metadata }), + ...(params.metadataType !== undefined && { metadataType: params.metadataType }), + ...(params.metadataVersion !== undefined && { metadataVersion: params.metadataVersion }), + }, + }); + } + + // --------------------------------------------------------------------------- + // rescheduleRun + // --------------------------------------------------------------------------- + + postgresTest( + "rescheduleRun with snapshot: writes delayUntil and creates a DELAYED snapshot", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_reschedule_snapshot_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_reschedule_snap_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + status: "DELAYED", + }); + + const delayUntil = new Date("2027-01-01T00:00:00.000Z"); + + const updated = await store.rescheduleRun(runId, { + delayUntil, + snapshot: { + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + }, + }); + + expect(updated.id).toBe(runId); + expect(updated.delayUntil).toEqual(delayUntil); + + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId, executionStatus: "DELAYED" }, + }); + expect(snapshots).toHaveLength(1); + expect(snapshots[0]?.runStatus).toBe("DELAYED"); + } + ); + + postgresTest( + "rescheduleRun with queueTimestamp and no snapshot: writes delayUntil + queueTimestamp, no new snapshot", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_reschedule_notimestamp_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_reschedule_notimestamp_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + status: "DELAYED", + }); + + const delayUntil = new Date("2027-02-01T00:00:00.000Z"); + const queueTimestamp = new Date("2027-02-01T00:00:00.000Z"); + + const updated = await store.rescheduleRun(runId, { delayUntil, queueTimestamp }); + + expect(updated.delayUntil).toEqual(delayUntil); + expect(updated.queueTimestamp).toEqual(queueTimestamp); + + const snapshotCount = await prisma.taskRunExecutionSnapshot.count({ where: { runId } }); + expect(snapshotCount).toBe(0); + } + ); + + // --------------------------------------------------------------------------- + // enqueueDelayedRun + // --------------------------------------------------------------------------- + + postgresTest( + "enqueueDelayedRun sets status to PENDING and writes queuedAt", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_enqueue_delayed_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_enqueue_delayed_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + status: "DELAYED", + }); + + const queuedAt = new Date("2026-06-17T10:00:00.000Z"); + const updated = await store.enqueueDelayedRun(runId, { queuedAt }); + + expect(updated.id).toBe(runId); + expect(updated.status).toBe("PENDING"); + expect(updated.queuedAt).toEqual(queuedAt); + } + ); + + // --------------------------------------------------------------------------- + // rewriteDebouncedRun + // --------------------------------------------------------------------------- + + postgresTest( + "rewriteDebouncedRun updates the requested columns and returns the run with associatedWaitpoint key", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_rewrite_debounced_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_rewrite_debounced_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + runTags: ["original-tag"], + }); + + const result = await store.rewriteDebouncedRun(runId, { + payload: '{"key":"newvalue"}', + payloadType: "application/json", + runTags: ["new-tag"], + }); + + expect(result.id).toBe(runId); + expect(result.payload).toBe('{"key":"newvalue"}'); + expect(result.runTags).toEqual(["new-tag"]); + // associatedWaitpoint key must exist in the result (even if null) + expect("associatedWaitpoint" in result).toBe(true); + } + ); + + // --------------------------------------------------------------------------- + // updateMetadata + // --------------------------------------------------------------------------- + + postgresTest( + "updateMetadata optimistic-lock: matching version writes metadata and returns count 1", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_update_meta_match_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_update_meta_match_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + metadata: '{"old":"data"}', + metadataType: "application/json", + metadataVersion: 1, + }); + + const updatedAt = new Date("2026-06-17T11:00:00.000Z"); + const result = await store.updateMetadata( + runId, + { + metadata: '{"new":"data"}', + metadataType: "application/json", + metadataVersion: { increment: 1 }, + updatedAt, + }, + { expectedMetadataVersion: 1 } + ); + + expect(result.count).toBe(1); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { metadata: true, metadataVersion: true, updatedAt: true }, + }); + expect(row?.metadata).toBe('{"new":"data"}'); + expect(row?.metadataVersion).toBe(2); + expect(row?.updatedAt).toEqual(updatedAt); + } + ); + + postgresTest( + "updateMetadata optimistic-lock: non-matching version returns count 0, row unchanged", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_update_meta_mismatch_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_update_meta_mismatch_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + metadata: '{"original":"data"}', + metadataType: "application/json", + metadataVersion: 5, + }); + + const result = await store.updateMetadata( + runId, + { + metadata: '{"new":"data"}', + metadataVersion: { increment: 1 }, + updatedAt: new Date(), + }, + { expectedMetadataVersion: 3 } // wrong version + ); + + expect(result.count).toBe(0); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { metadata: true, metadataVersion: true }, + }); + expect(row?.metadata).toBe('{"original":"data"}'); + expect(row?.metadataVersion).toBe(5); + } + ); + + postgresTest( + "updateMetadata direct (no expectedMetadataVersion): writes metadata and returns count 1", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_update_meta_direct_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_update_meta_direct_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + metadataVersion: 0, + }); + + const result = await store.updateMetadata( + runId, + { + metadata: '{"direct":"write"}', + metadataType: "application/json", + metadataVersion: { increment: 1 }, + updatedAt: new Date(), + }, + {} + ); + + expect(result.count).toBe(1); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { metadata: true, metadataVersion: true }, + }); + expect(row?.metadata).toBe('{"direct":"write"}'); + expect(row?.metadataVersion).toBe(1); + } + ); + + // --------------------------------------------------------------------------- + // clearIdempotencyKey + // --------------------------------------------------------------------------- + + postgresTest( + "clearIdempotencyKey byId: clears both idempotencyKey and idempotencyKeyExpiresAt when key matches", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_clear_idempotency_byid_1"; + const expiresAt = new Date("2028-01-01T00:00:00.000Z"); + + await seedRunWithIdempotency(prisma, { + runId, + friendlyId: "run_clear_byid_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-key-abc", + idempotencyKeyExpiresAt: expiresAt, + }); + + const result = await store.clearIdempotencyKey({ + byId: { runId, idempotencyKey: "idem-key-abc" }, + }); + + expect(result.count).toBe(1); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { idempotencyKey: true, idempotencyKeyExpiresAt: true }, + }); + expect(row?.idempotencyKey).toBeNull(); + expect(row?.idempotencyKeyExpiresAt).toBeNull(); + } + ); + + postgresTest( + "clearIdempotencyKey byId: returns count 0 when idempotencyKey does not match", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_clear_byid_mismatch_1"; + + await seedRunWithIdempotency(prisma, { + runId, + friendlyId: "run_clear_byid_mismatch_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-key-real", + }); + + const result = await store.clearIdempotencyKey({ + byId: { runId, idempotencyKey: "idem-key-wrong" }, + }); + + expect(result.count).toBe(0); + + // key still set + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { idempotencyKey: true }, + }); + expect(row?.idempotencyKey).toBe("idem-key-real"); + } + ); + + postgresTest( + "clearIdempotencyKey byPredicate: clears both columns when predicate matches", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_clear_predicate_1"; + const expiresAt = new Date("2028-06-01T00:00:00.000Z"); + + await seedRunWithIdempotency(prisma, { + runId, + friendlyId: "run_clear_predicate_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + taskIdentifier: "predicate-task", + idempotencyKey: "pred-idem-key", + idempotencyKeyExpiresAt: expiresAt, + }); + + const result = await store.clearIdempotencyKey({ + byPredicate: { + idempotencyKey: "pred-idem-key", + taskIdentifier: "predicate-task", + runtimeEnvironmentId: environment.id, + }, + }); + + expect(result.count).toBe(1); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { idempotencyKey: true, idempotencyKeyExpiresAt: true }, + }); + expect(row?.idempotencyKey).toBeNull(); + expect(row?.idempotencyKeyExpiresAt).toBeNull(); + } + ); + + postgresTest( + "clearIdempotencyKey byFriendlyIds: clears ONLY idempotencyKey, leaves idempotencyKeyExpiresAt intact", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_clear_friendly_1"; + const expiresAt = new Date("2028-07-01T00:00:00.000Z"); + + await seedRunWithIdempotency(prisma, { + runId, + friendlyId: "run_clear_friendly_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "friendly-idem-key", + idempotencyKeyExpiresAt: expiresAt, + }); + + const result = await store.clearIdempotencyKey({ + byFriendlyIds: ["run_clear_friendly_friendly_1"], + }); + + expect(result.count).toBe(1); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { idempotencyKey: true, idempotencyKeyExpiresAt: true }, + }); + // idempotencyKey cleared + expect(row?.idempotencyKey).toBeNull(); + // idempotencyKeyExpiresAt NOT cleared (byFriendlyIds only clears the key) + expect(row?.idempotencyKeyExpiresAt).toEqual(expiresAt); + } + ); + + // --------------------------------------------------------------------------- + // pushTags + // --------------------------------------------------------------------------- + + postgresTest( + "pushTags appends to existing runTags (seed [a], push [b,c] → [a,b,c]) and returns updatedAt", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_push_tags_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_push_tags_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + runTags: ["a"], + }); + + const result = await store.pushTags(runId, ["b", "c"], { + runtimeEnvironmentId: environment.id, + }); + + expect(result.updatedAt).toBeInstanceOf(Date); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { runTags: true }, + }); + expect(row?.runTags).toEqual(["a", "b", "c"]); + } + ); + + // --------------------------------------------------------------------------- + // pushRealtimeStream + // --------------------------------------------------------------------------- + + postgresTest( + "pushRealtimeStream appends streamId to existing realtimeStreams", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_push_stream_1"; + + await seedRun(prisma, { + runId, + friendlyId: "run_push_stream_friendly_1", + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + realtimeStreams: ["existing-stream"], + }); + + await store.pushRealtimeStream(runId, "new-stream"); + + const row = await prisma.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + expect(row?.realtimeStreams).toEqual(["existing-stream", "new-stream"]); + } + ); +}); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 37352762d59..ee6ad9e0666 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -460,65 +460,155 @@ export class PostgresRunStore implements RunStore { }) as Promise>; } - rescheduleRun( - _runId: string, - _data: { delayUntil: Date; queueTimestamp?: Date; snapshot?: RescheduleSnapshotInput }, - _tx?: PrismaClientOrTransaction + async rescheduleRun( + runId: string, + data: { delayUntil: Date; queueTimestamp?: Date; snapshot?: RescheduleSnapshotInput }, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + delayUntil: data.delayUntil, + ...(data.queueTimestamp !== undefined && { queueTimestamp: data.queueTimestamp }), + ...(data.snapshot && { + executionSnapshots: { + create: { + engine: "V2", + executionStatus: "DELAYED", + description: "Delayed run was rescheduled to a future date", + runStatus: "DELAYED", + environmentId: data.snapshot.environmentId, + environmentType: data.snapshot.environmentType, + projectId: data.snapshot.projectId, + organizationId: data.snapshot.organizationId, + }, + }, + }), + }, + }); } - enqueueDelayedRun( - _runId: string, - _data: { queuedAt: Date }, - _tx?: PrismaClientOrTransaction + async enqueueDelayedRun( + runId: string, + data: { queuedAt: Date }, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "PENDING", + queuedAt: data.queuedAt, + }, + }); } - rewriteDebouncedRun( - _runId: string, - _data: RewriteDebouncedRunData, - _tx?: PrismaClientOrTransaction + async rewriteDebouncedRun( + runId: string, + data: RewriteDebouncedRunData, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId }, + data, + include: { + associatedWaitpoint: true, + }, + }); } - updateMetadata( - _runId: string, - _data: { + async updateMetadata( + runId: string, + data: { metadata: string | null; metadataType?: string; metadataVersion: { increment: number }; updatedAt: Date; }, - _options: { expectedMetadataVersion?: number }, - _tx?: PrismaClientOrTransaction + options: { expectedMetadataVersion?: number }, + tx?: PrismaClientOrTransaction ): Promise<{ count: number }> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + if (options.expectedMetadataVersion !== undefined) { + const result = await prisma.taskRun.updateMany({ + where: { id: runId, metadataVersion: options.expectedMetadataVersion }, + data, + }); + return { count: result.count }; + } + + await prisma.taskRun.update({ + where: { id: runId }, + data, + }); + return { count: 1 }; } - clearIdempotencyKey( - _params: ClearIdempotencyKeyInput, - _tx?: PrismaClientOrTransaction + async clearIdempotencyKey( + params: ClearIdempotencyKeyInput, + tx?: PrismaClientOrTransaction ): Promise<{ count: number }> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + if (params.byId) { + const result = await prisma.taskRun.updateMany({ + where: { id: params.byId.runId, idempotencyKey: params.byId.idempotencyKey }, + data: { idempotencyKey: null, idempotencyKeyExpiresAt: null }, + }); + return { count: result.count }; + } + + if (params.byPredicate) { + const result = await prisma.taskRun.updateMany({ + where: { + idempotencyKey: params.byPredicate.idempotencyKey, + taskIdentifier: params.byPredicate.taskIdentifier, + runtimeEnvironmentId: params.byPredicate.runtimeEnvironmentId, + }, + data: { idempotencyKey: null, idempotencyKeyExpiresAt: null }, + }); + return { count: result.count }; + } + + // byFriendlyIds — only clears idempotencyKey, not idempotencyKeyExpiresAt + const result = await prisma.taskRun.updateMany({ + where: { friendlyId: { in: params.byFriendlyIds } }, + data: { idempotencyKey: null }, + }); + return { count: result.count }; } - pushTags( - _runId: string, - _tags: string[], - _where: { runtimeEnvironmentId: string }, - _tx?: PrismaClientOrTransaction + async pushTags( + runId: string, + tags: string[], + where: { runtimeEnvironmentId: string }, + tx?: PrismaClientOrTransaction ): Promise<{ updatedAt: Date }> { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + return prisma.taskRun.update({ + where: { id: runId, runtimeEnvironmentId: where.runtimeEnvironmentId }, + data: { runTags: { push: tags } }, + select: { updatedAt: true }, + }); } - pushRealtimeStream( - _runId: string, - _streamId: string, - _tx?: PrismaClientOrTransaction + async pushRealtimeStream( + runId: string, + streamId: string, + tx?: PrismaClientOrTransaction ): Promise { - throw new Error("not implemented"); + const prisma = tx ?? this.prisma; + + await prisma.taskRun.update({ + where: { id: runId }, + data: { realtimeStreams: { push: streamId } }, + }); } } From 56ec7071a84adc4d89806aa7a469d6a1e587d60c Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:24:05 +0100 Subject: [PATCH 10/83] feat(run-store): wire RunStore into run-engine SystemResources and webapp BaseService Add RunStore field to SystemResources, instantiate PostgresRunStore in RunEngine constructor (after prisma/readOnlyPrisma are set), and expose it on the resources object passed to all systems. Create a webapp singleton (runStore.server.ts) and thread it as a default parameter into BaseService so subclasses can access it without changes. --- apps/webapp/app/v3/runStore.server.ts | 8 ++++++++ apps/webapp/app/v3/services/baseService.server.ts | 5 ++++- internal-packages/run-engine/src/engine/index.ts | 4 ++++ .../run-engine/src/engine/systems/systems.ts | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 apps/webapp/app/v3/runStore.server.ts diff --git a/apps/webapp/app/v3/runStore.server.ts b/apps/webapp/app/v3/runStore.server.ts new file mode 100644 index 00000000000..2993597ea17 --- /dev/null +++ b/apps/webapp/app/v3/runStore.server.ts @@ -0,0 +1,8 @@ +import { PostgresRunStore } from "@internal/run-store"; +import { $replica, prisma } from "~/db.server"; +import { singleton } from "~/utils/singleton"; + +export const runStore = singleton( + "PostgresRunStore", + () => new PostgresRunStore({ prisma, readOnlyPrisma: $replica }) +); diff --git a/apps/webapp/app/v3/services/baseService.server.ts b/apps/webapp/app/v3/services/baseService.server.ts index 06c8bd33ea5..9dc3a33d084 100644 --- a/apps/webapp/app/v3/services/baseService.server.ts +++ b/apps/webapp/app/v3/services/baseService.server.ts @@ -1,8 +1,10 @@ import { Span, SpanKind } from "@opentelemetry/api"; +import type { RunStore } from "@internal/run-store"; import { $replica, PrismaClientOrTransaction, prisma } from "~/db.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { attributesFromAuthenticatedEnv, tracer } from "../tracer.server"; import { engine, RunEngine } from "../runEngine.server"; +import { runStore as defaultRunStore } from "../runStore.server"; import { ServiceValidationError } from "./common.server"; export { ServiceValidationError }; @@ -10,7 +12,8 @@ export { ServiceValidationError }; export abstract class BaseService { constructor( protected readonly _prisma: PrismaClientOrTransaction = prisma, - protected readonly _replica: PrismaClientOrTransaction = $replica + protected readonly _replica: PrismaClientOrTransaction = $replica, + protected readonly runStore: RunStore = defaultRunStore ) {} protected async traceWithEnv( diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts index 2b434a86eec..a324a196a9d 100644 --- a/internal-packages/run-engine/src/engine/index.ts +++ b/internal-packages/run-engine/src/engine/index.ts @@ -73,6 +73,7 @@ import { RaceSimulationSystem } from "./systems/raceSimulationSystem.js"; import { RunAttemptSystem } from "./systems/runAttemptSystem.js"; import { NoopPendingVersionRunIdLookup } from "./services/pendingVersionLookup.js"; import { SystemResources } from "./systems/systems.js"; +import { PostgresRunStore, RunStore } from "@internal/run-store"; import { TtlSystem } from "./systems/ttlSystem.js"; import { WaitpointSystem } from "./systems/waitpointSystem.js"; import { @@ -102,6 +103,7 @@ export class RunEngine { prisma: PrismaClient; readOnlyPrisma: PrismaReplicaClient; + runStore: RunStore; runQueue: RunQueue; eventBus: EventBus = new EventEmitter(); executionSnapshotSystem: ExecutionSnapshotSystem; @@ -123,6 +125,7 @@ export class RunEngine { this.logger = options.logger ?? new Logger("RunEngine", this.options.logLevel ?? "info"); this.prisma = options.prisma; this.readOnlyPrisma = options.readOnlyPrisma ?? this.prisma; + this.runStore = new PostgresRunStore({ prisma: this.prisma, readOnlyPrisma: this.readOnlyPrisma }); this.runLockRedis = createRedisClient( { ...options.runLock.redis, @@ -313,6 +316,7 @@ export class RunEngine { const resources: SystemResources = { prisma: this.prisma, readOnlyPrisma: this.readOnlyPrisma, + runStore: this.runStore, worker: this.worker, eventBus: this.eventBus, logger: this.logger, diff --git a/internal-packages/run-engine/src/engine/systems/systems.ts b/internal-packages/run-engine/src/engine/systems/systems.ts index e21f95958d1..1b2f1d64c51 100644 --- a/internal-packages/run-engine/src/engine/systems/systems.ts +++ b/internal-packages/run-engine/src/engine/systems/systems.ts @@ -1,4 +1,5 @@ import { Meter, Tracer } from "@internal/tracing"; +import { RunStore } from "@internal/run-store"; import { Logger } from "@trigger.dev/core/logger"; import { PrismaClient, PrismaReplicaClient } from "@trigger.dev/database"; import { RunQueue } from "../../run-queue/index.js"; @@ -11,6 +12,7 @@ import { RaceSimulationSystem } from "./raceSimulationSystem.js"; export type SystemResources = { prisma: PrismaClient; readOnlyPrisma: PrismaReplicaClient; + runStore: RunStore; worker: EngineWorker; eventBus: EventBus; logger: Logger; From 01bbc67fdcd234e8c9c724c937b188ab6d3f10ce Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:32:48 +0100 Subject: [PATCH 11/83] fix(run-store): align create-input types with the columns callers actually pass --- internal-packages/run-store/src/types.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index 9bef8219183..6e1e2846066 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -75,7 +75,7 @@ export type RunAssociatedWaitpointInput = { type: "RUN"; status: "PENDING"; idempotencyKey: string; - userProvidedIdempotencyKey: false; + userProvidedIdempotencyKey: boolean; projectId: string; environmentId: string; }; @@ -92,7 +92,7 @@ export type CreateRunData = { projectId: string; idempotencyKey?: string; idempotencyKeyExpiresAt?: Date; - idempotencyKeyOptions?: string[]; + idempotencyKeyOptions?: Prisma.InputJsonValue; taskIdentifier: string; payload: string; payloadType: string; @@ -113,7 +113,7 @@ export type CreateRunData = { delayUntil?: Date; queuedAt?: Date; maxAttempts?: number; - taskEventStore: string; + taskEventStore?: string; priorityMs?: number; queueTimestamp?: Date; ttl?: string; @@ -124,7 +124,7 @@ export type CreateRunData = { replayedFromTaskRunFriendlyId?: string; batchId?: string; resumeParentOnCompletion?: boolean; - depth: number; + depth?: number; metadata?: string; metadataType?: string; seedMetadata?: string; @@ -137,7 +137,7 @@ export type CreateRunData = { bulkActionGroupIds?: string[]; planType?: string; realtimeStreamsVersion?: string; - streamBasinName?: string; + streamBasinName?: string | null; debounce?: Prisma.InputJsonValue; annotations?: Prisma.InputJsonValue; }; @@ -179,7 +179,7 @@ export type CreateFailedRunData = { depth: number; batchId?: string; resumeParentOnCompletion?: boolean; - taskEventStore: string; + taskEventStore?: string; }; export type CreateFailedRunInput = { From de52aaa057682ff40808f2fb14655bcaf472b923 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:35:18 +0100 Subject: [PATCH 12/83] refactor(run-engine): route run creation through RunStore --- .../run-engine/src/engine/index.ts | 413 +++++++++--------- 1 file changed, 204 insertions(+), 209 deletions(-) diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts index a324a196a9d..5ad54c49d7b 100644 --- a/internal-packages/run-engine/src/engine/index.ts +++ b/internal-packages/run-engine/src/engine/index.ts @@ -536,84 +536,85 @@ export class RunEngine { const error: TaskRunError = { type: "STRING_ERROR", raw: cancelReason }; try { - const taskRun = await prisma.taskRun.create({ - data: { - id, - engine: "V2", - status: "CANCELED", - friendlyId: snapshot.friendlyId, - runtimeEnvironmentId: snapshot.environment.id, - environmentType: snapshot.environment.type, - organizationId: snapshot.environment.organization.id, - projectId: snapshot.environment.project.id, - idempotencyKey: snapshot.idempotencyKey, - idempotencyKeyExpiresAt: snapshot.idempotencyKeyExpiresAt, - idempotencyKeyOptions: snapshot.idempotencyKeyOptions, - taskIdentifier: snapshot.taskIdentifier, - payload: snapshot.payload, - payloadType: snapshot.payloadType, - context: snapshot.context, - traceContext: snapshot.traceContext, - traceId: snapshot.traceId, - spanId: snapshot.spanId, - parentSpanId: snapshot.parentSpanId, - lockedToVersionId: snapshot.lockedToVersionId, - taskVersion: snapshot.taskVersion, - sdkVersion: snapshot.sdkVersion, - cliVersion: snapshot.cliVersion, - concurrencyKey: snapshot.concurrencyKey, - queue: snapshot.queue, - lockedQueueId: snapshot.lockedQueueId, - workerQueue: snapshot.workerQueue, - isTest: snapshot.isTest, - taskEventStore: snapshot.taskEventStore, - // Defensive: the snapshot comes from a cjson-encoded buffer - // payload, where empty Lua tables encode as `{}` not `[]`. If - // the drainer pops a buffered run with no tags, snapshot.tags - // will be an empty object, which Prisma misreads as a relation - // update op. Normalise to a real array (or undefined for the - // empty case). - runTags: Array.isArray(snapshot.tags) && snapshot.tags.length > 0 - ? snapshot.tags - : undefined, - oneTimeUseToken: snapshot.oneTimeUseToken, - parentTaskRunId: snapshot.parentTaskRunId, - rootTaskRunId: snapshot.rootTaskRunId, - replayedFromTaskRunFriendlyId: snapshot.replayedFromTaskRunFriendlyId, - batchId: snapshot.batch?.id, - resumeParentOnCompletion: snapshot.resumeParentOnCompletion, - depth: snapshot.depth, - seedMetadata: snapshot.seedMetadata, - seedMetadataType: snapshot.seedMetadataType, - metadata: snapshot.metadata, - metadataType: snapshot.metadataType, - machinePreset: snapshot.machine, - scheduleId: snapshot.scheduleId, - scheduleInstanceId: snapshot.scheduleInstanceId, - createdAt: snapshot.createdAt, - bulkActionGroupIds: snapshot.bulkActionId ? [snapshot.bulkActionId] : undefined, - planType: snapshot.planType, - realtimeStreamsVersion: snapshot.realtimeStreamsVersion, - streamBasinName: snapshot.streamBasinName, - annotations: snapshot.annotations, - completedAt: cancelledAt, - updatedAt: cancelledAt, - error: error as unknown as Prisma.InputJsonValue, - attemptNumber: 0, - executionSnapshots: { - create: { - engine: "V2", - executionStatus: "FINISHED", - description: "Run cancelled before materialisation", - runStatus: "CANCELED", - environmentId: snapshot.environment.id, - environmentType: snapshot.environment.type, - projectId: snapshot.environment.project.id, - organizationId: snapshot.environment.organization.id, - }, + const taskRun = await this.runStore.createCancelledRun( + { + data: { + id, + engine: "V2", + status: "CANCELED", + friendlyId: snapshot.friendlyId, + runtimeEnvironmentId: snapshot.environment.id, + environmentType: snapshot.environment.type, + organizationId: snapshot.environment.organization.id, + projectId: snapshot.environment.project.id, + idempotencyKey: snapshot.idempotencyKey, + idempotencyKeyExpiresAt: snapshot.idempotencyKeyExpiresAt, + idempotencyKeyOptions: snapshot.idempotencyKeyOptions, + taskIdentifier: snapshot.taskIdentifier, + payload: snapshot.payload, + payloadType: snapshot.payloadType, + context: snapshot.context, + traceContext: snapshot.traceContext, + traceId: snapshot.traceId, + spanId: snapshot.spanId, + parentSpanId: snapshot.parentSpanId, + lockedToVersionId: snapshot.lockedToVersionId, + taskVersion: snapshot.taskVersion, + sdkVersion: snapshot.sdkVersion, + cliVersion: snapshot.cliVersion, + concurrencyKey: snapshot.concurrencyKey, + queue: snapshot.queue, + lockedQueueId: snapshot.lockedQueueId, + workerQueue: snapshot.workerQueue, + isTest: snapshot.isTest, + taskEventStore: snapshot.taskEventStore, + // Defensive: the snapshot comes from a cjson-encoded buffer + // payload, where empty Lua tables encode as `{}` not `[]`. If + // the drainer pops a buffered run with no tags, snapshot.tags + // will be an empty object, which Prisma misreads as a relation + // update op. Normalise to a real array (or undefined for the + // empty case). + runTags: Array.isArray(snapshot.tags) && snapshot.tags.length > 0 + ? snapshot.tags + : undefined, + oneTimeUseToken: snapshot.oneTimeUseToken, + parentTaskRunId: snapshot.parentTaskRunId, + rootTaskRunId: snapshot.rootTaskRunId, + replayedFromTaskRunFriendlyId: snapshot.replayedFromTaskRunFriendlyId, + batchId: snapshot.batch?.id, + resumeParentOnCompletion: snapshot.resumeParentOnCompletion, + depth: snapshot.depth, + seedMetadata: snapshot.seedMetadata, + seedMetadataType: snapshot.seedMetadataType, + metadata: snapshot.metadata, + metadataType: snapshot.metadataType, + machinePreset: snapshot.machine, + scheduleId: snapshot.scheduleId, + scheduleInstanceId: snapshot.scheduleInstanceId, + createdAt: snapshot.createdAt, + bulkActionGroupIds: snapshot.bulkActionId ? [snapshot.bulkActionId] : undefined, + planType: snapshot.planType, + realtimeStreamsVersion: snapshot.realtimeStreamsVersion, + streamBasinName: snapshot.streamBasinName, + annotations: snapshot.annotations, + completedAt: cancelledAt, + updatedAt: cancelledAt, + error: error as unknown as Prisma.InputJsonValue, + attemptNumber: 0, + }, + snapshot: { + engine: "V2", + executionStatus: "FINISHED", + description: "Run cancelled before materialisation", + runStatus: "CANCELED", + environmentId: snapshot.environment.id, + environmentType: snapshot.environment.type, + projectId: snapshot.environment.project.id, + organizationId: snapshot.environment.organization.id, }, }, - }); + prisma + ); if (emitRunCancelledEvent) { this.eventBus.emit("runCancelled", { @@ -829,111 +830,107 @@ export class RunEngine { let taskRun: TaskRun & { associatedWaitpoint: Waitpoint | null }; const taskRunId = RunId.fromFriendlyId(friendlyId); try { - taskRun = await prisma.taskRun.create({ - include: { - associatedWaitpoint: true, - }, - data: { - id: taskRunId, - engine: "V2", - status, - friendlyId, - runtimeEnvironmentId: environment.id, - environmentType: environment.type, - organizationId: environment.organization.id, - projectId: environment.project.id, - idempotencyKey, - idempotencyKeyExpiresAt, - idempotencyKeyOptions, - taskIdentifier, - payload, - payloadType, - context, - traceContext, - traceId, - spanId, - parentSpanId, - lockedToVersionId, - taskVersion, - sdkVersion, - cliVersion, - concurrencyKey, - queue, - lockedQueueId, - workerQueue, - isTest, - delayUntil, - queuedAt, - maxAttempts, - taskEventStore, - priorityMs, - queueTimestamp: queueTimestamp ?? delayUntil ?? new Date(), - ttl: resolvedTtl, - // Defensive: when the mollifier drainer replays a buffered - // snapshot whose payload was rewritten by a buffer-side Lua - // mutate (e.g. append_tags clears an empty list), cjson - // encodes an empty Lua table as `{}` rather than `[]`. JS - // parses that back as an empty object, and `{}.length` is - // undefined — the original `tags.length === 0` check would - // pass `{}` straight to Prisma's `String[]` column. Mirror - // the same Array.isArray guard that `createCancelledRun` - // uses for symmetry with the trigger replay path. - runTags: Array.isArray(tags) && tags.length > 0 ? tags : undefined, - oneTimeUseToken, - parentTaskRunId, - rootTaskRunId, - replayedFromTaskRunFriendlyId, - batchId: batch?.id, - resumeParentOnCompletion, - depth, - metadata, - metadataType, - seedMetadata, - seedMetadataType, - maxDurationInSeconds, - machinePreset: machine, - scheduleId, - scheduleInstanceId, - createdAt, - bulkActionGroupIds: bulkActionId ? [bulkActionId] : undefined, - planType, - realtimeStreamsVersion, - streamBasinName, - debounce: debounce - ? { - key: debounce.key, - delay: debounce.delay, - createdAt: new Date(), - } - : undefined, - annotations, - executionSnapshots: { - create: { - engine: "V2", - executionStatus: delayUntil ? "DELAYED" : "RUN_CREATED", - description: delayUntil ? "Run is delayed" : "Run was created", - runStatus: status, - environmentId: environment.id, - environmentType: environment.type, - projectId: environment.project.id, - organizationId: environment.organization.id, - workerId, - runnerId, - }, + taskRun = await this.runStore.createRun( + { + data: { + id: taskRunId, + engine: "V2", + status, + friendlyId, + runtimeEnvironmentId: environment.id, + environmentType: environment.type, + organizationId: environment.organization.id, + projectId: environment.project.id, + idempotencyKey, + idempotencyKeyExpiresAt, + idempotencyKeyOptions, + taskIdentifier, + payload, + payloadType, + context, + traceContext, + traceId, + spanId, + parentSpanId, + lockedToVersionId, + taskVersion, + sdkVersion, + cliVersion, + concurrencyKey, + queue, + lockedQueueId, + workerQueue, + isTest, + delayUntil, + queuedAt, + maxAttempts, + taskEventStore, + priorityMs, + queueTimestamp: queueTimestamp ?? delayUntil ?? new Date(), + ttl: resolvedTtl, + // Defensive: when the mollifier drainer replays a buffered + // snapshot whose payload was rewritten by a buffer-side Lua + // mutate (e.g. append_tags clears an empty list), cjson + // encodes an empty Lua table as `{}` rather than `[]`. JS + // parses that back as an empty object, and `{}.length` is + // undefined — the original `tags.length === 0` check would + // pass `{}` straight to Prisma's `String[]` column. Mirror + // the same Array.isArray guard that `createCancelledRun` + // uses for symmetry with the trigger replay path. + runTags: Array.isArray(tags) && tags.length > 0 ? tags : undefined, + oneTimeUseToken, + parentTaskRunId, + rootTaskRunId, + replayedFromTaskRunFriendlyId, + batchId: batch?.id, + resumeParentOnCompletion, + depth, + metadata, + metadataType, + seedMetadata, + seedMetadataType, + maxDurationInSeconds, + machinePreset: machine, + scheduleId, + scheduleInstanceId, + createdAt, + bulkActionGroupIds: bulkActionId ? [bulkActionId] : undefined, + planType, + realtimeStreamsVersion, + streamBasinName, + debounce: debounce + ? { + key: debounce.key, + delay: debounce.delay, + createdAt: new Date(), + } + : undefined, + annotations, + }, + snapshot: { + engine: "V2", + executionStatus: delayUntil ? "DELAYED" : "RUN_CREATED", + description: delayUntil ? "Run is delayed" : "Run was created", + runStatus: status, + environmentId: environment.id, + environmentType: environment.type, + projectId: environment.project.id, + organizationId: environment.organization.id, + workerId, + runnerId, }, // Only create waitpoint if parent is waiting for this run to complete // For standalone triggers (no waiting parent), waitpoint is created lazily if needed later associatedWaitpoint: resumeParentOnCompletion && parentTaskRunId - ? { - create: this.waitpointSystem.buildRunAssociatedWaitpoint({ - projectId: environment.project.id, - environmentId: environment.id, - }), - } + ? this.waitpointSystem.buildRunAssociatedWaitpoint({ + projectId: environment.project.id, + environmentId: environment.id, + }) : undefined, }, - }); + prisma + ); } catch (error) { if (error instanceof Prisma.PrismaClientKnownRequestError) { this.logger.debug("engine.trigger(): Prisma transaction error", { @@ -1178,42 +1175,40 @@ export class RunEngine { // Create the run in terminal SYSTEM_FAILURE status. // No execution snapshot is needed: this run never gets dequeued, executed, // or heartbeated, so nothing will call getLatestExecutionSnapshot on it. - const taskRun = await this.prisma.taskRun.create({ - include: { - associatedWaitpoint: true, - }, - data: { - id: taskRunId, - engine: "V2", - status: "SYSTEM_FAILURE", - friendlyId, - runtimeEnvironmentId: environment.id, - environmentType: environment.type, - organizationId: environment.organization.id, - projectId: environment.project.id, - taskIdentifier, - payload: payload ?? "", - payloadType: payloadType ?? "application/json", - context: {}, - traceContext: (traceContext ?? {}) as Record, - traceId: traceId ?? "", - spanId: spanId ?? "", - queue: queueOverride ?? `task/${taskIdentifier}`, - lockedQueueId: lockedQueueIdOverride, - isTest: false, - completedAt: new Date(), - error: error as unknown as Prisma.InputJsonObject, - parentTaskRunId, - rootTaskRunId, - depth: depth ?? 0, - batchId: batch?.id, - resumeParentOnCompletion, - taskEventStore, - associatedWaitpoint: waitpointData - ? { create: waitpointData } - : undefined, + const taskRun = await this.runStore.createFailedRun( + { + data: { + id: taskRunId, + engine: "V2", + status: "SYSTEM_FAILURE", + friendlyId, + runtimeEnvironmentId: environment.id, + environmentType: environment.type, + organizationId: environment.organization.id, + projectId: environment.project.id, + taskIdentifier, + payload: payload ?? "", + payloadType: payloadType ?? "application/json", + context: {}, + traceContext: (traceContext ?? {}) as Record, + traceId: traceId ?? "", + spanId: spanId ?? "", + queue: queueOverride ?? `task/${taskIdentifier}`, + lockedQueueId: lockedQueueIdOverride, + isTest: false, + completedAt: new Date(), + error: error as unknown as Prisma.InputJsonObject, + parentTaskRunId, + rootTaskRunId, + depth: depth ?? 0, + batchId: batch?.id, + resumeParentOnCompletion, + taskEventStore, + }, + associatedWaitpoint: waitpointData, }, - }); + this.prisma + ); span.setAttribute("runId", taskRun.id); From 48261171fe6cb4a4b1cbff45c7db8aac16eaec97 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:39:54 +0100 Subject: [PATCH 13/83] fix(run-store): allow optional machinePreset in recordRetryOutcome (leave-unchanged semantics) --- internal-packages/run-store/src/PostgresRunStore.ts | 2 +- internal-packages/run-store/src/types.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index ee6ad9e0666..76f726db317 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -191,7 +191,7 @@ export class PostgresRunStore implements RunStore { async recordRetryOutcome( runId: string, - data: { machinePreset: string; usageDurationMs: number; costInCents: number }, + data: { machinePreset?: string; usageDurationMs: number; costInCents: number }, args: { include: I }, tx?: PrismaClientOrTransaction ): Promise> { diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index 6e1e2846066..c284868a37d 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -243,7 +243,7 @@ export interface RunStore { ): Promise>; recordRetryOutcome( runId: string, - data: { machinePreset: string; usageDurationMs: number; costInCents: number }, + data: { machinePreset?: string; usageDurationMs: number; costInCents: number }, args: { include: I }, tx?: PrismaClientOrTransaction ): Promise>; From 8650e406cfe8a9ededc5af0518a40d3ba1091184 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:41:10 +0100 Subject: [PATCH 14/83] refactor(run-engine): route attempt lifecycle, cancel, and fail writes through RunStore --- .../src/engine/systems/runAttemptSystem.ts | 379 +++++++++--------- 1 file changed, 185 insertions(+), 194 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts index 02fd83a7a25..1aa1738f3b0 100644 --- a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts @@ -397,67 +397,67 @@ export class RunAttemptSystem { const result = await $transaction( prisma, async (tx) => { - const run = await tx.taskRun.update({ - where: { - id: taskRun.id, - }, - data: { - status: "EXECUTING", + const run = await this.$.runStore.startAttempt( + taskRun.id, + { attemptNumber: nextAttemptNumber, executedAt: taskRun.attemptNumber === null ? new Date() : undefined, isWarmStart: isWarmStart ?? false, }, - select: { - id: true, - createdAt: true, - updatedAt: true, - executedAt: true, - baseCostInCents: true, - projectId: true, - organizationId: true, - friendlyId: true, - lockedById: true, - lockedQueueId: true, - queue: true, - attemptNumber: true, - status: true, - ttl: true, - metadata: true, - metadataType: true, - machinePreset: true, - payload: true, - payloadType: true, - runTags: true, - isTest: true, - replayedFromTaskRunFriendlyId: true, - idempotencyKey: true, - idempotencyKeyOptions: true, - startedAt: true, - maxAttempts: true, - taskVersion: true, - maxDurationInSeconds: true, - usageDurationMs: true, - costInCents: true, - traceContext: true, - priorityMs: true, - batchId: true, - realtimeStreamsVersion: true, - runtimeEnvironment: { - select: { - id: true, - slug: true, - type: true, - branchName: true, - git: true, - organizationId: true, + { + select: { + id: true, + createdAt: true, + updatedAt: true, + executedAt: true, + baseCostInCents: true, + projectId: true, + organizationId: true, + friendlyId: true, + lockedById: true, + lockedQueueId: true, + queue: true, + attemptNumber: true, + status: true, + ttl: true, + metadata: true, + metadataType: true, + machinePreset: true, + payload: true, + payloadType: true, + runTags: true, + isTest: true, + replayedFromTaskRunFriendlyId: true, + idempotencyKey: true, + idempotencyKeyOptions: true, + startedAt: true, + maxAttempts: true, + taskVersion: true, + maxDurationInSeconds: true, + usageDurationMs: true, + costInCents: true, + traceContext: true, + priorityMs: true, + batchId: true, + realtimeStreamsVersion: true, + runtimeEnvironment: { + select: { + id: true, + slug: true, + type: true, + branchName: true, + git: true, + organizationId: true, + }, }, + parentTaskRunId: true, + rootTaskRunId: true, + workerQueue: true, + taskEventStore: true, }, - parentTaskRunId: true, - rootTaskRunId: true, - workerQueue: true, - taskEventStore: true, }, - }); + tx + ); const newSnapshot = await this.executionSnapshotSystem.createExecutionSnapshot(tx, { run, @@ -740,58 +740,58 @@ export class RunAttemptSystem { environmentType: latestSnapshot.environmentType, }); - const run = await prisma.taskRun.update({ - where: { id: runId }, - data: { - status: "COMPLETED_SUCCESSFULLY", + const run = await this.$.runStore.completeAttemptSuccess( + runId, + { completedAt, output: completion.output, outputType: completion.outputType, usageDurationMs: updatedUsage.usageDurationMs, costInCents: updatedUsage.costInCents, - executionSnapshots: { - create: { - executionStatus: "FINISHED", - description: "Task completed successfully", - runStatus: "COMPLETED_SUCCESSFULLY", - attemptNumber: latestSnapshot.attemptNumber, - environmentId: latestSnapshot.environmentId, - environmentType: latestSnapshot.environmentType, - projectId: latestSnapshot.projectId, - organizationId: latestSnapshot.organizationId, - workerId, - runnerId, - }, + snapshot: { + executionStatus: "FINISHED", + description: "Task completed successfully", + runStatus: "COMPLETED_SUCCESSFULLY", + attemptNumber: latestSnapshot.attemptNumber, + environmentId: latestSnapshot.environmentId, + environmentType: latestSnapshot.environmentType, + projectId: latestSnapshot.projectId, + organizationId: latestSnapshot.organizationId, + workerId, + runnerId, }, }, - select: { - id: true, - friendlyId: true, - status: true, - attemptNumber: true, - spanId: true, - updatedAt: true, - associatedWaitpoint: { - select: { - id: true, + { + select: { + id: true, + friendlyId: true, + status: true, + attemptNumber: true, + spanId: true, + updatedAt: true, + associatedWaitpoint: { + select: { + id: true, + }, }, - }, - project: { - select: { - organizationId: true, + project: { + select: { + organizationId: true, + }, }, + batchId: true, + createdAt: true, + completedAt: true, + taskEventStore: true, + parentTaskRunId: true, + usageDurationMs: true, + costInCents: true, + runtimeEnvironmentId: true, + projectId: true, }, - batchId: true, - createdAt: true, - completedAt: true, - taskEventStore: true, - parentTaskRunId: true, - usageDurationMs: true, - costInCents: true, - runtimeEnvironmentId: true, - projectId: true, }, - }); + prisma + ); const newSnapshot = await getLatestExecutionSnapshot(prisma, runId); await this.$.runQueue.acknowledgeMessage(run.project.organizationId, runId); @@ -997,25 +997,26 @@ export class RunAttemptSystem { environmentType: latestSnapshot.environmentType, }); - const run = await prisma.taskRun.update({ - where: { - id: runId, - }, - data: { + const run = await this.$.runStore.recordRetryOutcome( + runId, + { machinePreset: retryResult.machine, usageDurationMs: updatedUsage.usageDurationMs, costInCents: updatedUsage.costInCents, }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - orgMember: true, + { + include: { + runtimeEnvironment: { + include: { + project: true, + organization: true, + orgMember: true, + }, }, }, }, - }); + this.$.prisma + ); const nextAttemptNumber = latestSnapshot.attemptNumber === null ? 1 : latestSnapshot.attemptNumber + 1; @@ -1250,19 +1251,17 @@ export class RunAttemptSystem { return { wasRequeued: false, ...result }; } - const requeuedRun = await prisma.taskRun.update({ - where: { - id: run.id, - }, - data: { - status: "PENDING", - }, - select: { - id: true, - status: true, - attemptNumber: true, + const requeuedRun = await this.$.runStore.requeueRun( + run.id, + { + select: { + id: true, + status: true, + attemptNumber: true, + }, }, - }); + prisma + ); const newSnapshot = await this.executionSnapshotSystem.createExecutionSnapshot(prisma, { run: requeuedRun, @@ -1338,14 +1337,7 @@ export class RunAttemptSystem { //already finished, do nothing if (latestSnapshot.executionStatus === "FINISHED") { if (bulkActionId) { - await prisma.taskRun.update({ - where: { id: runId }, - data: { - bulkActionGroupIds: { - push: bulkActionId, - }, - }, - }); + await this.$.runStore.recordBulkActionMembership(runId, bulkActionId, prisma); } return { alreadyFinished: true, @@ -1398,52 +1390,50 @@ export class RunAttemptSystem { }); } - const run = await prisma.taskRun.update({ - where: { id: runId }, - data: { - status: "CANCELED", + const run = await this.$.runStore.cancelRun( + runId, + { completedAt: finalizeRun ? completedAt ?? new Date() : completedAt, error, - bulkActionGroupIds: bulkActionId - ? { - push: bulkActionId, - } - : undefined, + ...(bulkActionId && { bulkActionId }), ...(usageUpdate && { usageDurationMs: usageUpdate.usageDurationMs, costInCents: usageUpdate.costInCents, }), }, - select: { - id: true, - friendlyId: true, - status: true, - attemptNumber: true, - spanId: true, - batchId: true, - createdAt: true, - completedAt: true, - taskEventStore: true, - parentTaskRunId: true, - delayUntil: true, - updatedAt: true, - runtimeEnvironment: { - select: { - organizationId: true, + { + select: { + id: true, + friendlyId: true, + status: true, + attemptNumber: true, + spanId: true, + batchId: true, + createdAt: true, + completedAt: true, + taskEventStore: true, + parentTaskRunId: true, + delayUntil: true, + updatedAt: true, + runtimeEnvironment: { + select: { + organizationId: true, + }, }, - }, - associatedWaitpoint: { - select: { - id: true, + associatedWaitpoint: { + select: { + id: true, + }, }, - }, - childRuns: { - select: { - id: true, + childRuns: { + select: { + id: true, + }, }, }, }, - }); + prisma + ); //if the run is delayed and hasn't started yet, we need to prevent it being added to the queue in future if (isInitialState(latestSnapshot.executionStatus) && run.delayUntil) { @@ -1612,51 +1602,52 @@ export class RunAttemptSystem { }); //run permanently failed - const run = await prisma.taskRun.update({ - where: { - id: runId, - }, - data: { + const run = await this.$.runStore.failRunPermanently( + runId, + { status, completedAt: failedAt, error: truncatedError, usageDurationMs: updatedUsage.usageDurationMs, costInCents: updatedUsage.costInCents, }, - select: { - id: true, - friendlyId: true, - status: true, - attemptNumber: true, - spanId: true, - batchId: true, - parentTaskRunId: true, - updatedAt: true, - usageDurationMs: true, - costInCents: true, - associatedWaitpoint: { - select: { - id: true, + { + select: { + id: true, + friendlyId: true, + status: true, + attemptNumber: true, + spanId: true, + batchId: true, + parentTaskRunId: true, + updatedAt: true, + usageDurationMs: true, + costInCents: true, + associatedWaitpoint: { + select: { + id: true, + }, }, - }, - runtimeEnvironment: { - select: { - id: true, - type: true, - organizationId: true, - project: { - select: { - id: true, - organizationId: true, + runtimeEnvironment: { + select: { + id: true, + type: true, + organizationId: true, + project: { + select: { + id: true, + organizationId: true, + }, }, }, }, + taskEventStore: true, + createdAt: true, + completedAt: true, }, - taskEventStore: true, - createdAt: true, - completedAt: true, }, - }); + this.$.prisma + ); const newSnapshot = await this.executionSnapshotSystem.createExecutionSnapshot(prisma, { run, From d530eb14bf8d521ba9f9492691ca3b4b471d709f Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:48:15 +0100 Subject: [PATCH 15/83] refactor(run-engine): route expiry and dequeue-lock writes through RunStore --- .../src/engine/systems/dequeueSystem.ts | 98 ++++++++----------- .../src/engine/systems/ttlSystem.ts | 86 ++++++++-------- 2 files changed, 82 insertions(+), 102 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts index 7c811ebfdfc..26ea7866a67 100644 --- a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts @@ -419,17 +419,14 @@ export class DequeueSystem { // Pre-generate snapshot ID so we can construct the result without an extra read const snapshotId = generateInternalId(); - const lockedTaskRun = await prisma.taskRun.update({ - where: { - id: runId, - }, - data: { + const lockedTaskRun = await this.$.runStore.lockRunToWorker( + runId, + { lockedAt, lockedById: result.task.id, lockedToVersionId: result.worker.id, lockedQueueId: result.queue.id, lockedRetryConfig: lockedRetryConfig ?? undefined, - status: "DEQUEUED", startedAt, baseCostInCents: this.options.machines.baseCostInCents, machinePreset: machinePreset.name, @@ -438,38 +435,27 @@ export class DequeueSystem { cliVersion: result.worker.cliVersion, maxDurationInSeconds, maxAttempts: maxAttempts ?? undefined, - executionSnapshots: { - create: { - id: snapshotId, - engine: "V2", - executionStatus: "PENDING_EXECUTING", - description: "Run was dequeued for execution", - // Map DEQUEUED -> PENDING for backwards compatibility with older runners - runStatus: "PENDING", - attemptNumber: result.run.attemptNumber ?? undefined, - previousSnapshotId: snapshot.id, - environmentId: snapshot.environmentId, - environmentType: snapshot.environmentType, - projectId: snapshot.projectId, - organizationId: snapshot.organizationId, - checkpointId: snapshot.checkpointId ?? undefined, - batchId: snapshot.batchId ?? undefined, - completedWaitpoints: { - connect: snapshot.completedWaitpoints.map((w) => ({ id: w.id })), - }, - completedWaitpointOrder: snapshot.completedWaitpoints - .filter((c) => c.index !== undefined) - .sort((a, b) => a.index! - b.index!) - .map((w) => w.id), - workerId, - runnerId, - }, + snapshot: { + id: snapshotId, + previousSnapshotId: snapshot.id, + attemptNumber: result.run.attemptNumber ?? undefined, + environmentId: snapshot.environmentId, + environmentType: snapshot.environmentType, + projectId: snapshot.projectId, + organizationId: snapshot.organizationId, + checkpointId: snapshot.checkpointId ?? undefined, + batchId: snapshot.batchId ?? undefined, + completedWaitpointIds: snapshot.completedWaitpoints.map((w) => w.id), + completedWaitpointOrder: snapshot.completedWaitpoints + .filter((c) => c.index !== undefined) + .sort((a, b) => a.index! - b.index!) + .map((w) => w.id), + workerId, + runnerId, }, }, - include: { - runtimeEnvironment: true, - }, - }); + prisma + ); this.$.eventBus.emit("runLocked", { time: new Date(), @@ -741,30 +727,32 @@ export class DequeueSystem { }); //mark run as waiting for deploy - const run = await prisma.taskRun.update({ - where: { id: runId }, - data: { - status: "PENDING_VERSION", + const run = await this.$.runStore.parkPendingVersion( + runId, + { statusReason, }, - select: { - id: true, - status: true, - attemptNumber: true, - updatedAt: true, - createdAt: true, - runTags: true, - batchId: true, - runtimeEnvironment: { - select: { - id: true, - type: true, - projectId: true, - project: { select: { id: true, organizationId: true } }, + { + select: { + id: true, + status: true, + attemptNumber: true, + updatedAt: true, + createdAt: true, + runTags: true, + batchId: true, + runtimeEnvironment: { + select: { + id: true, + type: true, + projectId: true, + project: { select: { id: true, organizationId: true } }, + }, }, }, }, - }); + prisma + ); this.$.logger.debug("RunEngine.dequeueFromWorkerQueue(): Pending version", { runId, diff --git a/internal-packages/run-engine/src/engine/systems/ttlSystem.ts b/internal-packages/run-engine/src/engine/systems/ttlSystem.ts index 8d078c88890..ebd1cbdd80b 100644 --- a/internal-packages/run-engine/src/engine/systems/ttlSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/ttlSystem.ts @@ -1,6 +1,6 @@ import { parseNaturalLanguageDuration } from "@trigger.dev/core/v3/isomorphic"; import { TaskRunError } from "@trigger.dev/core/v3/schemas"; -import { Prisma, PrismaClientOrTransaction, TaskRunStatus } from "@trigger.dev/database"; +import { PrismaClientOrTransaction, TaskRunStatus } from "@trigger.dev/database"; import { isExecuting } from "../statuses.js"; import { getLatestExecutionSnapshot } from "./executionSnapshotSystem.js"; import { SystemResources } from "./systems.js"; @@ -61,51 +61,51 @@ export class TtlSystem { raw: `Run expired because the TTL (${run.ttl}) was reached`, }; - const updatedRun = await prisma.taskRun.update({ - where: { id: runId }, - data: { - status: "EXPIRED", + const updatedRun = await this.$.runStore.expireRun( + runId, + { + error, completedAt: new Date(), expiredAt: new Date(), - error, - executionSnapshots: { - create: { - engine: "V2", - executionStatus: "FINISHED", - description: "Run was expired because the TTL was reached", - runStatus: "EXPIRED", - environmentId: snapshot.environmentId, - environmentType: snapshot.environmentType, - projectId: snapshot.projectId, - organizationId: snapshot.organizationId, - }, + snapshot: { + engine: "V2", + executionStatus: "FINISHED", + description: "Run was expired because the TTL was reached", + runStatus: "EXPIRED", + environmentId: snapshot.environmentId, + environmentType: snapshot.environmentType, + projectId: snapshot.projectId, + organizationId: snapshot.organizationId, }, }, - select: { - id: true, - spanId: true, - ttl: true, - updatedAt: true, - associatedWaitpoint: { - select: { - id: true, + { + select: { + id: true, + spanId: true, + ttl: true, + updatedAt: true, + associatedWaitpoint: { + select: { + id: true, + }, }, - }, - runtimeEnvironment: { - select: { - organizationId: true, - projectId: true, - id: true, + runtimeEnvironment: { + select: { + organizationId: true, + projectId: true, + id: true, + }, }, + createdAt: true, + completedAt: true, + taskEventStore: true, + parentTaskRunId: true, + expiredAt: true, + status: true, }, - createdAt: true, - completedAt: true, - taskEventStore: true, - parentTaskRunId: true, - expiredAt: true, - status: true, }, - }); + prisma + ); await this.$.runQueue.acknowledgeMessage( updatedRun.runtimeEnvironment.organizationId, @@ -228,15 +228,7 @@ export class TtlSystem { raw: "Run expired because the TTL was reached", }; - await this.$.prisma.$executeRaw` - UPDATE "TaskRun" - SET "status" = 'EXPIRED'::"TaskRunStatus", - "completedAt" = ${now}, - "expiredAt" = ${now}, - "updatedAt" = ${now}, - "error" = ${JSON.stringify(error)}::jsonb - WHERE "id" IN (${Prisma.join(runIdsToExpire)}) - `; + await this.$.runStore.expireRunsBatch(runIdsToExpire, { error, now }, this.$.prisma); // Process each run: enqueue waitpoint completion jobs and emit events await pMap( From 4ec5aab7a43eecb88b4499db2674cfcdcbb6c1b1 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:49:11 +0100 Subject: [PATCH 16/83] fix(run-store): allow undefined maxDurationInSeconds in lockRunToWorker input --- internal-packages/run-store/src/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index c284868a37d..ccadf984803 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -199,7 +199,7 @@ export type LockRunData = { taskVersion: string; sdkVersion: string | null; cliVersion: string | null; - maxDurationInSeconds: number | null; + maxDurationInSeconds: number | null | undefined; maxAttempts?: number; snapshot: LockSnapshotInput; }; From 109c6a76117b478bf26e7f685f6ccb49c64a2e03 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 14:54:09 +0100 Subject: [PATCH 17/83] refactor(run-engine): route checkpoint, delayed, pending-version, and debounce writes through RunStore --- .../src/engine/systems/checkpointSystem.ts | 60 +++++++++---------- .../src/engine/systems/debounceSystem.ts | 8 +-- .../src/engine/systems/delayedRunSystem.ts | 37 +++++------- .../engine/systems/pendingVersionSystem.ts | 5 +- 4 files changed, 45 insertions(+), 65 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts index 6c66591e288..b956a0f01aa 100644 --- a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts @@ -115,22 +115,20 @@ export class CheckpointSystem { } // Get the run and update the status - const run = await this.$.prisma.taskRun.update({ - where: { - id: runId, - }, - data: { - status: "WAITING_TO_RESUME", - }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, + const run = await this.$.runStore.suspendForCheckpoint( + runId, + { + include: { + runtimeEnvironment: { + include: { + project: true, + organization: true, + }, }, }, }, - }); + this.$.prisma + ); if (!run) { this.$.logger.error("Run not found for createCheckpoint", { @@ -294,26 +292,24 @@ export class CheckpointSystem { } // Get the run and update the status - const run = await this.$.prisma.taskRun.update({ - where: { - id: runId, - }, - data: { - status: "EXECUTING", - }, - select: { - id: true, - status: true, - attemptNumber: true, - organizationId: true, - runtimeEnvironmentId: true, - projectId: true, - updatedAt: true, - createdAt: true, - runTags: true, - batchId: true, + const run = await this.$.runStore.resumeFromCheckpoint( + runId, + { + select: { + id: true, + status: true, + attemptNumber: true, + organizationId: true, + runtimeEnvironmentId: true, + projectId: true, + updatedAt: true, + createdAt: true, + runTags: true, + batchId: true, + }, }, - }); + this.$.prisma + ); if (!run) { this.$.logger.error("Run not found for createCheckpoint", { diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts index 0e59d1d69df..5b9d851d0f2 100644 --- a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -1160,13 +1160,7 @@ return 0 updatePayload.runTags = updateData.tags; } - const updatedRun = await prisma.taskRun.update({ - where: { id: runId }, - data: updatePayload, - include: { - associatedWaitpoint: true, - }, - }); + const updatedRun = await this.$.runStore.rewriteDebouncedRun(runId, updatePayload, prisma); return updatedRun; } diff --git a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts index 10c965741cf..cff29a75a4f 100644 --- a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts @@ -48,26 +48,19 @@ export class DelayedRunSystem { throw new ServiceValidationError("Cannot reschedule a run that is not delayed"); } - const updatedRun = await prisma.taskRun.update({ - where: { - id: runId, - }, - data: { + const updatedRun = await this.$.runStore.rescheduleRun( + runId, + { delayUntil: delayUntil, - executionSnapshots: { - create: { - engine: "V2", - executionStatus: "DELAYED", - description: "Delayed run was rescheduled to a future date", - runStatus: "DELAYED", - environmentId: snapshot.environmentId, - environmentType: snapshot.environmentType, - projectId: snapshot.projectId, - organizationId: snapshot.organizationId, - }, + snapshot: { + environmentId: snapshot.environmentId, + environmentType: snapshot.environmentType, + projectId: snapshot.projectId, + organizationId: snapshot.organizationId, }, }, - }); + prisma + ); await this.$.worker.reschedule(`enqueueDelayedRun:${updatedRun.id}`, delayUntil); @@ -178,13 +171,13 @@ export class DelayedRunSystem { const queuedAt = new Date(); - const updatedRun = await this.$.prisma.taskRun.update({ - where: { id: runId }, - data: { - status: "PENDING", + const updatedRun = await this.$.runStore.enqueueDelayedRun( + runId, + { queuedAt, }, - }); + this.$.prisma + ); this.$.eventBus.emit("runEnqueuedAfterDelay", { time: new Date(), diff --git a/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts b/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts index b46b857f02a..59d72c4c461 100644 --- a/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts @@ -129,10 +129,7 @@ export class PendingVersionSystem { // Idempotency guard: only flips PENDING_VERSION → PENDING. If another // worker already promoted this run between our findMany and the // update, count is 0 and we skip the enqueue. - const updateResult = await tx.taskRun.updateMany({ - where: { id: run.id, status: "PENDING_VERSION" }, - data: { status: "PENDING" }, - }); + const updateResult = await this.$.runStore.promotePendingVersionRuns(run.id, tx); if (updateResult.count === 0) { return false; From 2fbdc5d0429d6454efc8dc8a5dc95a039ce6e188 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 15:01:27 +0100 Subject: [PATCH 18/83] refactor(webapp): route run metadata, idempotency-key, and reschedule writes through RunStore --- .../concerns/idempotencyKeys.server.ts | 17 ++++--- .../metadata/updateMetadata.server.ts | 49 ++++++++++--------- .../app/v3/services/batchTriggerV3.server.ts | 8 +-- .../v3/services/rescheduleTaskRun.server.ts | 11 ++--- .../v3/services/resetIdempotencyKey.server.ts | 38 +++++++------- 5 files changed, 62 insertions(+), 61 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 493c5c1ce4b..2bdf95eb9a6 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -10,6 +10,7 @@ import { getMollifierBuffer } from "~/v3/mollifier/mollifierBuffer.server"; import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.server"; import { claimOrAwait } from "~/v3/mollifier/idempotencyClaim.server"; import { makeResolveMollifierFlag } from "~/v3/mollifier/mollifierGate.server"; +import { runStore } from "~/v3/runStore.server"; import type { TraceEventConcern, TriggerTaskRequest } from "../types"; // In-memory per-org mollifier-enabled check, shared with `evaluateGate` @@ -190,10 +191,10 @@ export class IdempotencyKeyConcern { }); // Update the existing run to remove the idempotency key - await this.prisma.taskRun.updateMany({ - where: { id: existingRun.id, idempotencyKey }, - data: { idempotencyKey: null, idempotencyKeyExpiresAt: null }, - }); + await runStore.clearIdempotencyKey( + { byId: { runId: existingRun.id, idempotencyKey } }, + this.prisma + ); return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } @@ -207,10 +208,10 @@ export class IdempotencyKeyConcern { }); // Update the existing run to remove the idempotency key - await this.prisma.taskRun.updateMany({ - where: { id: existingRun.id, idempotencyKey }, - data: { idempotencyKey: null, idempotencyKeyExpiresAt: null }, - }); + await runStore.clearIdempotencyKey( + { byId: { runId: existingRun.id, idempotencyKey } }, + this.prisma + ); return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } diff --git a/apps/webapp/app/services/metadata/updateMetadata.server.ts b/apps/webapp/app/services/metadata/updateMetadata.server.ts index 7b87034a301..e85c756ae92 100644 --- a/apps/webapp/app/services/metadata/updateMetadata.server.ts +++ b/apps/webapp/app/services/metadata/updateMetadata.server.ts @@ -13,6 +13,8 @@ import { Effect, Schedule, Duration, Fiber } from "effect"; import { type RuntimeFiber } from "effect/Fiber"; import { setTimeout } from "timers/promises"; import { Logger, LogLevel } from "@trigger.dev/core/logger"; +import type { RunStore } from "@internal/run-store"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; const RUN_UPDATABLE_WINDOW_MS = 60 * 60 * 1000; // 1 hour @@ -24,6 +26,7 @@ type BufferedRunMetadataChangeOperation = { export type UpdateMetadataServiceOptions = { prisma: PrismaClientOrTransaction; + runStore?: RunStore; flushIntervalMs?: number; flushEnabled?: boolean; flushLoggingEnabled?: boolean; @@ -49,6 +52,7 @@ export class UpdateMetadataService { private _bufferedOperations: Map = new Map(); private _flushFiber: RuntimeFiber | null = null; private readonly _prisma: PrismaClientOrTransaction; + private readonly _runStore: RunStore; private readonly flushIntervalMs: number; private readonly flushEnabled: boolean; private readonly flushLoggingEnabled: boolean; @@ -57,6 +61,7 @@ export class UpdateMetadataService { constructor(private readonly options: UpdateMetadataServiceOptions) { this._prisma = options.prisma; + this._runStore = options.runStore ?? defaultRunStore; this.flushIntervalMs = options.flushIntervalMs ?? 5000; this.flushEnabled = options.flushEnabled ?? true; this.flushLoggingEnabled = options.flushLoggingEnabled ?? true; @@ -260,17 +265,16 @@ export class UpdateMetadataService { const writeTime = new Date(); const result = yield* _( Effect.tryPromise(() => - this._prisma.taskRun.updateMany({ - where: { - id: runId, - metadataVersion: run.metadataVersion, - }, - data: { - metadata: newMetadataPacket.data, + this._runStore.updateMetadata( + runId, + { + metadata: newMetadataPacket.data!, metadataVersion: { increment: 1 }, updatedAt: writeTime, }, - }) + { expectedMetadataVersion: run.metadataVersion }, + this._prisma + ) ) ); @@ -469,20 +473,19 @@ export class UpdateMetadataService { // Update with optimistic locking; updatedAt stamped explicitly so the caller can // publish the exact committed watermark without a follow-up read. const writeTime = new Date(); - const result = await this._prisma.taskRun.updateMany({ - where: { - id: runId, - metadataVersion: run.metadataVersion, - }, - data: { - metadata: newMetadataPacket.data, + const result = await this._runStore.updateMetadata( + runId, + { + metadata: newMetadataPacket.data!, metadataType: newMetadataPacket.dataType, metadataVersion: { increment: 1, }, updatedAt: writeTime, }, - }); + { expectedMetadataVersion: run.metadataVersion }, + this._prisma + ); if (result.count === 0) { if (this.flushLoggingEnabled) { @@ -564,19 +567,19 @@ export class UpdateMetadataService { // Update the metadata without version check; updatedAt stamped explicitly so the // caller can publish the exact committed watermark. const writeTime = new Date(); - await this._prisma.taskRun.update({ - where: { - id: runId, - }, - data: { - metadata: metadataPacket?.data, + await this._runStore.updateMetadata( + runId, + { + metadata: metadataPacket?.data!, metadataType: metadataPacket?.dataType, metadataVersion: { increment: 1, }, updatedAt: writeTime, }, - }); + {}, + this._prisma + ); updatedAtMs = writeTime.getTime(); } diff --git a/apps/webapp/app/v3/services/batchTriggerV3.server.ts b/apps/webapp/app/v3/services/batchTriggerV3.server.ts index 22aa64b5e16..33036871599 100644 --- a/apps/webapp/app/v3/services/batchTriggerV3.server.ts +++ b/apps/webapp/app/v3/services/batchTriggerV3.server.ts @@ -408,10 +408,10 @@ export class BatchTriggerV3Service extends BaseService { // Expire the cached runs that are no longer valid if (expiredRunIds.size) { - await this._prisma.taskRun.updateMany({ - where: { friendlyId: { in: Array.from(expiredRunIds) } }, - data: { idempotencyKey: null }, - }); + await this.runStore.clearIdempotencyKey( + { byFriendlyIds: Array.from(expiredRunIds) }, + this._prisma + ); } return runs; diff --git a/apps/webapp/app/v3/services/rescheduleTaskRun.server.ts b/apps/webapp/app/v3/services/rescheduleTaskRun.server.ts index 43163fb4fbe..707473167ea 100644 --- a/apps/webapp/app/v3/services/rescheduleTaskRun.server.ts +++ b/apps/webapp/app/v3/services/rescheduleTaskRun.server.ts @@ -17,15 +17,14 @@ export class RescheduleTaskRunService extends BaseService { throw new ServiceValidationError(`Invalid delay: ${body.delay}`); } - const updatedRun = await this._prisma.taskRun.update({ - where: { - id: taskRun.id, - }, - data: { + const updatedRun = await this.runStore.rescheduleRun( + taskRun.id, + { delayUntil: delay, queueTimestamp: delay, }, - }); + this._prisma + ); if (updatedRun.engine === "V1") { await EnqueueDelayedRunService.reschedule(taskRun.id, delay); diff --git a/apps/webapp/app/v3/services/resetIdempotencyKey.server.ts b/apps/webapp/app/v3/services/resetIdempotencyKey.server.ts index 8273d8c9d97..0aa44e94662 100644 --- a/apps/webapp/app/v3/services/resetIdempotencyKey.server.ts +++ b/apps/webapp/app/v3/services/resetIdempotencyKey.server.ts @@ -9,17 +9,16 @@ export class ResetIdempotencyKeyService extends BaseService { taskIdentifier: string, authenticatedEnv: AuthenticatedEnvironment ): Promise<{ id: string }> { - const { count: pgCount } = await this._prisma.taskRun.updateMany({ - where: { - idempotencyKey, - taskIdentifier, - runtimeEnvironmentId: authenticatedEnv.id, - }, - data: { - idempotencyKey: null, - idempotencyKeyExpiresAt: null, + const { count: pgCount } = await this.runStore.clearIdempotencyKey( + { + byPredicate: { + idempotencyKey, + taskIdentifier, + runtimeEnvironmentId: authenticatedEnv.id, + }, }, - }); + this._prisma + ); // Buffer-side reset: the key may belong to a buffered run that // hasn't materialised yet. The PG updateMany above can't see it. @@ -75,17 +74,16 @@ export class ResetIdempotencyKeyService extends BaseService { // lookup against the writer when there's nothing to find; // otherwise the exact write the customer asked for (i.e., not // duplicative — without it the reset is silently lost). - const { count: handoffPgCount } = await this._prisma.taskRun.updateMany({ - where: { - idempotencyKey, - taskIdentifier, - runtimeEnvironmentId: authenticatedEnv.id, - }, - data: { - idempotencyKey: null, - idempotencyKeyExpiresAt: null, + const { count: handoffPgCount } = await this.runStore.clearIdempotencyKey( + { + byPredicate: { + idempotencyKey, + taskIdentifier, + runtimeEnvironmentId: authenticatedEnv.id, + }, }, - }); + this._prisma + ); if (handoffPgCount > 0) { logger.info( `Reset idempotency key via handoff re-check: ${idempotencyKey} for task: ${taskIdentifier} in env: ${authenticatedEnv.id}, affected ${handoffPgCount} run(s)` From 1a5ccdcfdf19147dd3c736172959f14da0259a33 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Wed, 17 Jun 2026 15:04:16 +0100 Subject: [PATCH 19/83] refactor(webapp): route tag and realtime-stream appends through RunStore --- apps/webapp/app/routes/api.v1.runs.$runId.tags.ts | 10 ++-------- ...ime.v1.streams.$runId.$target.$streamId.append.ts | 12 ++---------- .../realtime.v1.streams.$runId.$target.$streamId.ts | 8 ++------ 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts b/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts index f984562eb3d..c3a99fcec4e 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts @@ -9,6 +9,7 @@ import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server"; import { logger } from "~/services/logger.server"; import { publishChangeRecord } from "~/services/realtime/runChangeNotifierInstance.server"; import { mutateWithFallback } from "~/v3/mollifier/mutateWithFallback.server"; +import { runStore } from "~/v3/runStore.server"; // Pull the existing tags out of a buffer entry's serialised payload so // the buffer-path response can dedup against them, matching the @@ -84,14 +85,7 @@ export async function action({ request, params }: ActionFunctionArgs) { if (newTags.length === 0) { return json({ message: "No new tags to add" }, { status: 200 }); } - const updated = await prisma.taskRun.update({ - where: { - id: taskRun.id, - runtimeEnvironmentId: env.id, - }, - data: { runTags: { push: newTags } }, - select: { updatedAt: true }, - }); + const updated = await runStore.pushTags(taskRun.id, newTags, { runtimeEnvironmentId: env.id }, prisma); // Publish a run-changed record with the NEW tag set so tag feeds reindex // (no-op unless enabled). updatedAt is the read-your-writes watermark. publishChangeRecord({ diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts index ec5800c1f9f..11074840a38 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts @@ -6,6 +6,7 @@ import { $replica, prisma } from "~/db.server"; import { getRealtimeStreamInstance } from "~/services/realtime/v1StreamsGlobal.server"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { ServiceValidationError } from "~/v3/services/common.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), @@ -87,16 +88,7 @@ const { action } = createActionApiRoute( } if (!targetRun.realtimeStreams.includes(params.streamId)) { - await prisma.taskRun.update({ - where: { - id: targetRun.id, - }, - data: { - realtimeStreams: { - push: params.streamId, - }, - }, - }); + await runStore.pushRealtimeStream(targetRun.id, params.streamId, prisma); } const part = await request.text(); diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts index dd3d3bf31dd..cdee9567b79 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts @@ -6,6 +6,7 @@ import { createActionApiRoute, createLoaderApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), @@ -86,12 +87,7 @@ const { action } = createActionApiRoute( } if (!target.realtimeStreams.includes(params.streamId)) { - await prisma.taskRun.update({ - where: { id: target.id }, - data: { - realtimeStreams: { push: params.streamId }, - }, - }); + await runStore.pushRealtimeStream(target.id, params.streamId, prisma); } const realtimeStream = getRealtimeStreamInstance( From 60565cf0f9487deaf2f6c347041bbe02eb0a0c4d Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 09:45:55 +0100 Subject: [PATCH 20/83] fix(run-store): short-circuit expireRunsBatch on an empty runIds array --- .../run-store/src/PostgresRunStore.test.ts | 47 +++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 6 +++ 2 files changed, 53 insertions(+) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index b9301bd70c6..f2fb2969e6c 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -670,6 +670,53 @@ describe("PostgresRunStore", () => { } ); + postgresTest( + "expireRunsBatch returns 0 and writes nothing when runIds is empty", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const runId = "run_expire_batch_empty"; + await prisma.taskRun.create({ + data: { + id: runId, + engine: "V2", + status: "PENDING", + friendlyId: "run_expire_batch_empty_friendly", + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_empty", + spanId: "span_empty", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + }); + + const error = { type: "STRING_ERROR" as const, raw: "unused" }; + + // Must not throw (Prisma.join([]) would build an invalid `IN ()` clause). + const count = await store.expireRunsBatch([], { error, now: new Date() }); + + expect(count).toBe(0); + + const row = await prisma.taskRun.findUniqueOrThrow({ + where: { id: runId }, + select: { status: true, expiredAt: true }, + }); + expect(row.status).toBe("PENDING"); + expect(row.expiredAt).toBeNull(); + } + ); + postgresTest( "lockRunToWorker sets status to DEQUEUED with lock columns, includes runtimeEnvironment, and creates one PENDING_EXECUTING snapshot", async ({ prisma }) => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 76f726db317..925a39425b6 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -335,6 +335,12 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = tx ?? this.prisma; + // Nothing to do for an empty set, and Prisma.join would build an invalid + // `IN ()` clause, so short-circuit before touching the database. + if (runIds.length === 0) { + return 0; + } + return prisma.$executeRaw` UPDATE "TaskRun" SET "status" = 'EXPIRED'::"TaskRunStatus", From 76f349420b1fc5670ba0d83c42d7333fc190d083 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 11:47:46 +0100 Subject: [PATCH 21/83] fix(webapp): inject runStore into UpdateMetadataService The service statically imported the db.server-backed runStore singleton, which dragged the Prisma client into otherwise-light test module graphs and opened an eager connection to DATABASE_URL on import. The metadata service test then threw an unhandled connection error whenever no database was reachable at the configured address. Make runStore a required constructor option, pass the singleton at the production construction site, and inject a testcontainer-backed store in the tests. --- .../app/services/metadata/updateMetadata.server.ts | 5 ++--- .../metadata/updateMetadataInstance.server.ts | 2 ++ apps/webapp/test/updateMetadata.test.ts | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/services/metadata/updateMetadata.server.ts b/apps/webapp/app/services/metadata/updateMetadata.server.ts index e85c756ae92..2cc057f10f2 100644 --- a/apps/webapp/app/services/metadata/updateMetadata.server.ts +++ b/apps/webapp/app/services/metadata/updateMetadata.server.ts @@ -14,7 +14,6 @@ import { type RuntimeFiber } from "effect/Fiber"; import { setTimeout } from "timers/promises"; import { Logger, LogLevel } from "@trigger.dev/core/logger"; import type { RunStore } from "@internal/run-store"; -import { runStore as defaultRunStore } from "~/v3/runStore.server"; const RUN_UPDATABLE_WINDOW_MS = 60 * 60 * 1000; // 1 hour @@ -26,7 +25,7 @@ type BufferedRunMetadataChangeOperation = { export type UpdateMetadataServiceOptions = { prisma: PrismaClientOrTransaction; - runStore?: RunStore; + runStore: RunStore; flushIntervalMs?: number; flushEnabled?: boolean; flushLoggingEnabled?: boolean; @@ -61,7 +60,7 @@ export class UpdateMetadataService { constructor(private readonly options: UpdateMetadataServiceOptions) { this._prisma = options.prisma; - this._runStore = options.runStore ?? defaultRunStore; + this._runStore = options.runStore; this.flushIntervalMs = options.flushIntervalMs ?? 5000; this.flushEnabled = options.flushEnabled ?? true; this.flushLoggingEnabled = options.flushLoggingEnabled ?? true; diff --git a/apps/webapp/app/services/metadata/updateMetadataInstance.server.ts b/apps/webapp/app/services/metadata/updateMetadataInstance.server.ts index 9f1818e5ed3..147df2bca2f 100644 --- a/apps/webapp/app/services/metadata/updateMetadataInstance.server.ts +++ b/apps/webapp/app/services/metadata/updateMetadataInstance.server.ts @@ -2,6 +2,7 @@ import { singleton } from "~/utils/singleton"; import { env } from "~/env.server"; import { UpdateMetadataService } from "./updateMetadata.server"; import { prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { publishChangeRecord } from "~/services/realtime/runChangeNotifierInstance.server"; export const updateMetadataService = singleton( @@ -9,6 +10,7 @@ export const updateMetadataService = singleton( () => new UpdateMetadataService({ prisma, + runStore, flushIntervalMs: env.BATCH_METADATA_OPERATIONS_FLUSH_INTERVAL_MS, flushEnabled: env.BATCH_METADATA_OPERATIONS_FLUSH_ENABLED === "1", flushLoggingEnabled: env.BATCH_METADATA_OPERATIONS_FLUSH_LOGGING_ENABLED === "1", diff --git a/apps/webapp/test/updateMetadata.test.ts b/apps/webapp/test/updateMetadata.test.ts index 6fa2605272d..b78a1a50a9f 100644 --- a/apps/webapp/test/updateMetadata.test.ts +++ b/apps/webapp/test/updateMetadata.test.ts @@ -2,6 +2,7 @@ import { containerTest } from "@internal/testcontainers"; import { parsePacket } from "@trigger.dev/core/v3"; import { setTimeout } from "timers/promises"; import { describe } from "vitest"; +import { PostgresRunStore } from "@internal/run-store"; import { UpdateMetadataService } from "~/services/metadata/updateMetadata.server"; import { MetadataTooLargeError } from "~/utils/packets"; @@ -13,6 +14,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, @@ -112,6 +114,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, @@ -280,6 +283,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, @@ -395,6 +399,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, @@ -587,6 +592,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, @@ -785,6 +791,7 @@ describe("UpdateMetadataService.call", () => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100000, // Very long interval so we can control flushing flushEnabled: true, flushLoggingEnabled: true, @@ -893,6 +900,7 @@ describe("UpdateMetadataService.call", () => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, @@ -1004,6 +1012,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100000, // Very long interval so we can control flushing flushEnabled: true, flushLoggingEnabled: true, @@ -1134,6 +1143,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, @@ -1209,6 +1219,7 @@ describe("UpdateMetadataService.call", () => { async ({ prisma, redisOptions }) => { const service = new UpdateMetadataService({ prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), flushIntervalMs: 100, flushEnabled: true, flushLoggingEnabled: true, From c5226a2dc079eff0e1ce8a3a4c2277659810ebd4 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 14:47:26 +0100 Subject: [PATCH 22/83] feat(run-store): add TaskRun read methods to the run store Add findRun, findRunOrThrow and findRuns to RunStore, mirroring the existing write methods. They pass where/select/include through the same Prisma generics and default to the read replica, while letting the caller pass the writer or a transaction client when needed. This lets Postgres reads of TaskRun be routed through the store the same way writes already are. Additive only; no call sites change yet. --- .../run-store/src/NoopRunStore.ts | 3 + .../run-store/src/PostgresRunStore.test.ts | 156 ++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 85 ++++++++++ internal-packages/run-store/src/types.ts | 46 ++++++ 4 files changed, 290 insertions(+) diff --git a/internal-packages/run-store/src/NoopRunStore.ts b/internal-packages/run-store/src/NoopRunStore.ts index 3b4fb0a36fe..e27080c9af6 100644 --- a/internal-packages/run-store/src/NoopRunStore.ts +++ b/internal-packages/run-store/src/NoopRunStore.ts @@ -29,4 +29,7 @@ export class NoopRunStore implements RunStore { clearIdempotencyKey(): never { return this.fail("clearIdempotencyKey"); } pushTags(): never { return this.fail("pushTags"); } pushRealtimeStream(): never { return this.fail("pushRealtimeStream"); } + findRun(): never { return this.fail("findRun"); } + findRunOrThrow(): never { return this.fail("findRunOrThrow"); } + findRuns(): never { return this.fail("findRuns"); } } diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index f2fb2969e6c..8540912c99e 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -1528,3 +1528,159 @@ describe("PostgresRunStore — delayed / debounce / metadata / idempotency / arr } ); }); + +describe("PostgresRunStore — read", () => { + postgresTest("findRun by id with select returns the projected row", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_find_select_id_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const run = await store.findRun({ id: runId }, { select: { friendlyId: true } }); + + expect(run).toEqual({ friendlyId: "run_friendly_1" }); + }); + + postgresTest("findRun by friendlyId with select returns the matching row", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_find_select_friendly_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const run = await store.findRun({ friendlyId: "run_friendly_1" }, { select: { id: true } }); + + expect(run?.id).toBe(runId); + }); + + postgresTest("findRun returns null when no row matches", async ({ prisma }) => { + await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const run = await store.findRun({ id: "missing" }, { select: { id: true } }); + + expect(run).toBeNull(); + }); + + postgresTest("findRunOrThrow throws when no row matches", async ({ prisma }) => { + await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + await expect(store.findRunOrThrow({ id: "missing" }, { select: { id: true } })).rejects.toThrow(); + }); + + postgresTest("findRun with include hydrates the relation", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_find_include_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const run = await store.findRun({ id: runId }, { include: { runtimeEnvironment: true } }); + + expect(run?.id).toBe(runId); + expect(run?.runtimeEnvironment).toBeDefined(); + expect(run?.runtimeEnvironment.id).toBe(environment.id); + }); + + postgresTest("findRuns applies where/orderBy/take and returns ordered, limited rows", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const earliest = new Date("2026-06-01T00:00:00.000Z"); + const middle = new Date("2026-06-02T00:00:00.000Z"); + const latest = new Date("2026-06-03T00:00:00.000Z"); + + const rows: Array<{ id: string; createdAt: Date }> = [ + { id: "run_find_many_earliest", createdAt: earliest }, + { id: "run_find_many_middle", createdAt: middle }, + { id: "run_find_many_latest", createdAt: latest }, + ]; + + for (const row of rows) { + await prisma.taskRun.create({ + data: { + id: row.id, + engine: "V2", + status: "PENDING", + friendlyId: `${row.id}_friendly`, + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${row.id}`, + spanId: `span_${row.id}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + createdAt: row.createdAt, + }, + }); + } + + const found = await store.findRuns({ + where: { projectId: project.id }, + select: { id: true }, + orderBy: { createdAt: "desc" }, + take: 2, + }); + + expect(found).toEqual([{ id: "run_find_many_latest" }, { id: "run_find_many_middle" }]); + }); + + postgresTest("findRun reads a just-written row when passed the writer client", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + // Use a NoopRunStore-style read replica that must NOT be hit: pass the writer + // (prisma) explicitly so reads go through it for read-after-write consistency. + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_find_read_after_write_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const run = await store.findRun({ id: runId }, { select: { id: true, status: true } }, prisma); + + expect(run?.id).toBe(runId); + expect(run?.status).toBe("PENDING"); + }); +}); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 925a39425b6..21514ea44de 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -617,4 +617,89 @@ export class PostgresRunStore implements RunStore { data: { realtimeStreams: { push: streamId } }, }); } + + findRun( + where: Prisma.TaskRunWhereInput, + args: { select: S }, + client?: PrismaClientOrTransaction + ): Promise | null>; + findRun( + where: Prisma.TaskRunWhereInput, + args: { include: I }, + client?: PrismaClientOrTransaction + ): Promise | null>; + async findRun( + where: Prisma.TaskRunWhereInput, + args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }, + client?: PrismaClientOrTransaction + ): Promise { + const prisma = client ?? this.readOnlyPrisma; + + return prisma.taskRun.findFirst({ + where, + ...args, + }); + } + + findRunOrThrow( + where: Prisma.TaskRunWhereInput, + args: { select: S }, + client?: PrismaClientOrTransaction + ): Promise>; + findRunOrThrow( + where: Prisma.TaskRunWhereInput, + args: { include: I }, + client?: PrismaClientOrTransaction + ): Promise>; + async findRunOrThrow( + where: Prisma.TaskRunWhereInput, + args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }, + client?: PrismaClientOrTransaction + ): Promise { + const prisma = client ?? this.readOnlyPrisma; + + return prisma.taskRun.findFirstOrThrow({ + where, + ...args, + }); + } + + findRuns( + args: { + where: Prisma.TaskRunWhereInput; + select: S; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + client?: PrismaClientOrTransaction + ): Promise[]>; + findRuns( + args: { + where: Prisma.TaskRunWhereInput; + include: I; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + client?: PrismaClientOrTransaction + ): Promise[]>; + async findRuns( + args: { + where: Prisma.TaskRunWhereInput; + select?: Prisma.TaskRunSelect; + include?: Prisma.TaskRunInclude; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + client?: PrismaClientOrTransaction + ): Promise { + const prisma = client ?? this.readOnlyPrisma; + + return prisma.taskRun.findMany(args); + } } diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index e680f254633..35d4d8f91a2 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -319,4 +319,50 @@ export interface RunStore { clearIdempotencyKey(params: ClearIdempotencyKeyInput, tx?: PrismaClientOrTransaction): Promise<{ count: number }>; pushTags(runId: string, tags: string[], where: { runtimeEnvironmentId: string }, tx?: PrismaClientOrTransaction): Promise<{ updatedAt: Date }>; pushRealtimeStream(runId: string, streamId: string, tx?: PrismaClientOrTransaction): Promise; + + // Read + findRun( + where: Prisma.TaskRunWhereInput, + args: { select: S }, + client?: PrismaClientOrTransaction + ): Promise | null>; + findRun( + where: Prisma.TaskRunWhereInput, + args: { include: I }, + client?: PrismaClientOrTransaction + ): Promise | null>; + + findRunOrThrow( + where: Prisma.TaskRunWhereInput, + args: { select: S }, + client?: PrismaClientOrTransaction + ): Promise>; + findRunOrThrow( + where: Prisma.TaskRunWhereInput, + args: { include: I }, + client?: PrismaClientOrTransaction + ): Promise>; + + findRuns( + args: { + where: Prisma.TaskRunWhereInput; + select: S; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + client?: PrismaClientOrTransaction + ): Promise[]>; + findRuns( + args: { + where: Prisma.TaskRunWhereInput; + include: I; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + client?: PrismaClientOrTransaction + ): Promise[]>; } From 13d53648b1885938480384c8689651f8c418d822 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 15:31:09 +0100 Subject: [PATCH 23/83] feat(run-store): add full-row read overload to the run store Add a no-args overload to findRun, findRunOrThrow and findRuns that returns the whole TaskRun row, for callers that read a run without a select or include. --- .../run-store/src/PostgresRunStore.test.ts | 88 +++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 63 ++++++++++++- internal-packages/run-store/src/types.ts | 12 +++ 3 files changed, 159 insertions(+), 4 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 8540912c99e..47876b70c8d 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -1683,4 +1683,92 @@ describe("PostgresRunStore — read", () => { expect(run?.id).toBe(runId); expect(run?.status).toBe("PENDING"); }); + + postgresTest("findRun by id with no projection returns the whole row", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const runId = "run_find_full_row_1"; + + await store.createRun( + buildCreateRunInput({ + runId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }) + ); + + const run = await store.findRun({ id: runId }); + + expect(run?.id).toBe(runId); + expect(run?.friendlyId).toBe("run_friendly_1"); + expect(run?.status).toBe("PENDING"); + expect(run?.taskIdentifier).toBe("my-task"); + // The whole-row variant returns the full scalar set, not a projection. + expect(run?.payload).toBe("{}"); + expect(run?.payloadType).toBe("application/json"); + }); + + postgresTest("findRunOrThrow with no projection throws when no row matches", async ({ prisma }) => { + await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + await expect(store.findRunOrThrow({ id: "missing" })).rejects.toThrow(); + }); + + postgresTest("findRuns with no projection returns whole rows", async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const earliest = new Date("2026-07-01T00:00:00.000Z"); + const latest = new Date("2026-07-02T00:00:00.000Z"); + + const rows: Array<{ id: string; createdAt: Date }> = [ + { id: "run_find_full_many_earliest", createdAt: earliest }, + { id: "run_find_full_many_latest", createdAt: latest }, + ]; + + for (const row of rows) { + await prisma.taskRun.create({ + data: { + id: row.id, + engine: "V2", + status: "PENDING", + friendlyId: `${row.id}_friendly`, + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${row.id}`, + spanId: `span_${row.id}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + createdAt: row.createdAt, + }, + }); + } + + const found = await store.findRuns({ + where: { projectId: project.id }, + orderBy: { createdAt: "desc" }, + }); + + expect(found).toHaveLength(2); + expect(found.map((r) => r.id)).toEqual([ + "run_find_full_many_latest", + "run_find_full_many_earliest", + ]); + // Whole rows include full scalar columns. + expect(found[0]?.taskIdentifier).toBe("my-task"); + expect(found[0]?.payloadType).toBe("application/json"); + }); }); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 21514ea44de..fcc53c00266 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -628,12 +628,16 @@ export class PostgresRunStore implements RunStore { args: { include: I }, client?: PrismaClientOrTransaction ): Promise | null>; + findRun( + where: Prisma.TaskRunWhereInput, + client?: PrismaClientOrTransaction + ): Promise; async findRun( where: Prisma.TaskRunWhereInput, - args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }, + argsOrClient?: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } | PrismaClientOrTransaction, client?: PrismaClientOrTransaction ): Promise { - const prisma = client ?? this.readOnlyPrisma; + const { args, prisma } = this.#resolveReadArgs(argsOrClient, client); return prisma.taskRun.findFirst({ where, @@ -651,12 +655,16 @@ export class PostgresRunStore implements RunStore { args: { include: I }, client?: PrismaClientOrTransaction ): Promise>; + findRunOrThrow( + where: Prisma.TaskRunWhereInput, + client?: PrismaClientOrTransaction + ): Promise; async findRunOrThrow( where: Prisma.TaskRunWhereInput, - args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }, + argsOrClient?: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } | PrismaClientOrTransaction, client?: PrismaClientOrTransaction ): Promise { - const prisma = client ?? this.readOnlyPrisma; + const { args, prisma } = this.#resolveReadArgs(argsOrClient, client); return prisma.taskRun.findFirstOrThrow({ where, @@ -686,6 +694,16 @@ export class PostgresRunStore implements RunStore { }, client?: PrismaClientOrTransaction ): Promise[]>; + findRuns( + args: { + where: Prisma.TaskRunWhereInput; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + client?: PrismaClientOrTransaction + ): Promise; async findRuns( args: { where: Prisma.TaskRunWhereInput; @@ -702,4 +720,41 @@ export class PostgresRunStore implements RunStore { return prisma.taskRun.findMany(args); } + + /** + * The single-row read methods (`findRun`, `findRunOrThrow`) accept either + * `(where, { select | include }, client?)` or the full-row `(where, client?)`. + * Disambiguate the second positional arg: a `{ select }` / `{ include }` + * projection object vs. a Prisma client. A projection object always carries a + * `select` or `include` key; a Prisma client never does. Anything else (e.g. + * `undefined`) is treated as "no projection, no explicit client". + */ + #resolveReadArgs( + argsOrClient: + | { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } + | PrismaClientOrTransaction + | undefined, + client: PrismaClientOrTransaction | undefined + ): { + args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }; + prisma: PrismaClientOrTransaction | PrismaReplicaClient; + } { + const isProjection = + typeof argsOrClient === "object" && + argsOrClient !== null && + ("select" in argsOrClient || "include" in argsOrClient); + + if (isProjection) { + return { + args: argsOrClient as { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }, + prisma: client ?? this.readOnlyPrisma, + }; + } + + // No projection: the second positional arg, when present, is the client. + return { + args: {}, + prisma: (argsOrClient as PrismaClientOrTransaction | undefined) ?? this.readOnlyPrisma, + }; + } } diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index 35d4d8f91a2..4c2d9d554aa 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -331,6 +331,7 @@ export interface RunStore { args: { include: I }, client?: PrismaClientOrTransaction ): Promise | null>; + findRun(where: Prisma.TaskRunWhereInput, client?: PrismaClientOrTransaction): Promise; findRunOrThrow( where: Prisma.TaskRunWhereInput, @@ -342,6 +343,7 @@ export interface RunStore { args: { include: I }, client?: PrismaClientOrTransaction ): Promise>; + findRunOrThrow(where: Prisma.TaskRunWhereInput, client?: PrismaClientOrTransaction): Promise; findRuns( args: { @@ -365,4 +367,14 @@ export interface RunStore { }, client?: PrismaClientOrTransaction ): Promise[]>; + findRuns( + args: { + where: Prisma.TaskRunWhereInput; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + client?: PrismaClientOrTransaction + ): Promise; } From cfa90521ecf119bd7ab64c10e43589b8cc8a9e0e Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 15:31:09 +0100 Subject: [PATCH 24/83] refactor(run-engine): route TaskRun reads through the run store Relocate the direct TaskRun reads in the engine and its systems to the RunStore read methods, preserving the exact client (writer, replica, or transaction) at each site. Behavior-preserving; the engine test suite is unchanged. --- .../run-engine/src/engine/index.ts | 34 +-- .../run-engine/src/engine/retrying.ts | 45 ++-- .../src/engine/systems/batchSystem.ts | 21 +- .../src/engine/systems/debounceSystem.ts | 40 ++-- .../src/engine/systems/delayedRunSystem.ts | 19 +- .../src/engine/systems/dequeueSystem.ts | 44 ++-- .../engine/systems/pendingVersionSystem.ts | 21 +- .../src/engine/systems/runAttemptSystem.ts | 226 ++++++++++-------- .../src/engine/systems/ttlSystem.ts | 4 +- .../src/engine/systems/waitpointSystem.ts | 45 ++-- 10 files changed, 278 insertions(+), 221 deletions(-) diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts index 8d1f4c9c1f8..a6a20b5b9fd 100644 --- a/internal-packages/run-engine/src/engine/index.ts +++ b/internal-packages/run-engine/src/engine/index.ts @@ -650,7 +650,7 @@ export class RunEngine { "createCancelledRun: row already exists, returning existing (idempotent)", { friendlyId: snapshot.friendlyId }, ); - const existing = await prisma.taskRun.findFirst({ where: { id } }); + const existing = await this.runStore.findRun({ id }, prisma); if (existing) { // Only treat the conflict as idempotent when the existing // row is ALREADY canceled. If a non-canceled row landed @@ -2325,16 +2325,19 @@ export class RunEngine { }); //the run didn't start executing, we need to requeue it - const run = await prisma.taskRun.findFirst({ - where: { id: runId }, - include: { - runtimeEnvironment: { - include: { - organization: true, + const run = await this.runStore.findRun( + { id: runId }, + { + include: { + runtimeEnvironment: { + include: { + organization: true, + }, }, }, }, - }); + prisma + ); if (!run) { this.logger.error( @@ -2629,12 +2632,15 @@ export class RunEngine { snapshotId, }); - const taskRun = await this.prisma.taskRun.findFirst({ - where: { id: runId }, - select: { - queue: true, + const taskRun = await this.runStore.findRun( + { id: runId }, + { + select: { + queue: true, + }, }, - }); + this.prisma + ); if (!taskRun) { this.logger.error( @@ -2708,7 +2714,7 @@ export class RunEngine { runIds: string[], completedAtOffsetMs: number = 1000 * 60 * 10 ): Promise> { - const runs = await this.readOnlyPrisma.taskRun.findMany({ + const runs = await this.runStore.findRuns({ where: { id: { in: runIds }, completedAt: { diff --git a/internal-packages/run-engine/src/engine/retrying.ts b/internal-packages/run-engine/src/engine/retrying.ts index 6099d5b649b..a64dfb796e1 100644 --- a/internal-packages/run-engine/src/engine/retrying.ts +++ b/internal-packages/run-engine/src/engine/retrying.ts @@ -10,6 +10,7 @@ import { TaskRunExecutionRetry, } from "@trigger.dev/core/v3"; import { PrismaClientOrTransaction } from "@trigger.dev/database"; +import { RunStore } from "@internal/run-store"; import { MAX_TASK_RUN_ATTEMPTS } from "./consts.js"; import { ServiceValidationError } from "./errors.js"; @@ -45,6 +46,7 @@ export type RetryOutcome = export async function retryOutcomeFromCompletion( prisma: PrismaClientOrTransaction, + runStore: RunStore, { runId, attemptNumber, error, retryUsingQueue, retrySettings }: Params ): Promise { // Canceled @@ -56,7 +58,7 @@ export async function retryOutcomeFromCompletion( // OOM error (retry on a larger machine or fail) if (isOOMRunError(error)) { - const oomResult = await retryOOMOnMachine(prisma, runId); + const oomResult = await retryOOMOnMachine(prisma, runStore, runId); if (!oomResult) { return { outcome: "fail_run", sanitizedError, wasOOMError: true }; } @@ -95,18 +97,21 @@ export async function retryOutcomeFromCompletion( } // Get the run settings and current usage values - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { id: runId, }, - select: { - maxAttempts: true, - lockedRetryConfig: true, - usageDurationMs: true, - costInCents: true, - machinePreset: true, + { + select: { + maxAttempts: true, + lockedRetryConfig: true, + usageDurationMs: true, + costInCents: true, + machinePreset: true, + }, }, - }); + prisma + ); if (!run) { throw new ServiceValidationError("Run not found", 404); @@ -179,6 +184,7 @@ export async function retryOutcomeFromCompletion( async function retryOOMOnMachine( prisma: PrismaClientOrTransaction, + runStore: RunStore, runId: string ): Promise<{ machine: string; @@ -188,17 +194,20 @@ async function retryOOMOnMachine( machinePreset: string | null; } | undefined> { try { - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { id: runId, }, - select: { - machinePreset: true, - lockedRetryConfig: true, - usageDurationMs: true, - costInCents: true, + { + select: { + machinePreset: true, + lockedRetryConfig: true, + usageDurationMs: true, + costInCents: true, + }, }, - }); + prisma + ); if (!run || !run.lockedRetryConfig || !run.machinePreset) { return; diff --git a/internal-packages/run-engine/src/engine/systems/batchSystem.ts b/internal-packages/run-engine/src/engine/systems/batchSystem.ts index 9933a715162..a3d44507a46 100644 --- a/internal-packages/run-engine/src/engine/systems/batchSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/batchSystem.ts @@ -87,16 +87,19 @@ export class BatchSystem { return; } - const runs = await this.$.prisma.taskRun.findMany({ - select: { - id: true, - status: true, - }, - where: { - batchId, - runtimeEnvironmentId: batch.runtimeEnvironmentId, + const runs = await this.$.runStore.findRuns( + { + select: { + id: true, + status: true, + }, + where: { + batchId, + runtimeEnvironmentId: batch.runtimeEnvironmentId, + }, }, - }); + this.$.prisma + ); if (runs.every((r) => isFinalRunStatus(r.status))) { this.$.logger.debug("#tryCompleteBatch: All runs are completed", { batchId }); diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts index 5b9d851d0f2..bf4b3e68bb4 100644 --- a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -606,10 +606,11 @@ return 0 return null; } - const probe = await prisma.taskRun.findFirst({ - where: { id: existingRunId }, - select: { status: true, delayUntil: true, createdAt: true }, - }); + const probe = await this.$.runStore.findRun( + { id: existingRunId }, + { select: { status: true, delayUntil: true, createdAt: true } }, + prisma + ); if (!probe || probe.status !== "DELAYED" || !probe.delayUntil) { return null; } @@ -632,10 +633,11 @@ return 0 return null; } - const fullRun = await prisma.taskRun.findFirst({ - where: { id: existingRunId }, - include: { associatedWaitpoint: true }, - }); + const fullRun = await this.$.runStore.findRun( + { id: existingRunId }, + { include: { associatedWaitpoint: true } }, + prisma + ); if (!fullRun || fullRun.status !== "DELAYED") { return null; } @@ -665,10 +667,11 @@ return 0 error: unknown; prisma: PrismaClientOrTransaction; }): Promise { - const fullRun = await prisma.taskRun.findFirst({ - where: { id: existingRunId }, - include: { associatedWaitpoint: true }, - }); + const fullRun = await this.$.runStore.findRun( + { id: existingRunId }, + { include: { associatedWaitpoint: true } }, + prisma + ); if (!fullRun || fullRun.status !== "DELAYED") { // The run is no longer in a state we can safely return as "existing" - @@ -775,12 +778,15 @@ return 0 } // Get the run to check debounce metadata and createdAt - const existingRun = await prisma.taskRun.findFirst({ - where: { id: existingRunId }, - include: { - associatedWaitpoint: true, + const existingRun = await this.$.runStore.findRun( + { id: existingRunId }, + { + include: { + associatedWaitpoint: true, + }, }, - }); + prisma + ); if (!existingRun) { this.$.logger.debug("handleExistingRun: existing run not found in database", { diff --git a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts index cff29a75a4f..a77e60d05e7 100644 --- a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts @@ -110,17 +110,20 @@ export class DelayedRunSystem { return; } - const run = await this.$.prisma.taskRun.findFirst({ - where: { id: runId }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, + const run = await this.$.runStore.findRun( + { id: runId }, + { + include: { + runtimeEnvironment: { + include: { + project: true, + organization: true, + }, }, }, }, - }); + this.$.prisma + ); if (!run) { throw new Error(`#enqueueDelayedRun: run not found: ${runId}`); diff --git a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts index 26ea7866a67..8791dc1bd12 100644 --- a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts @@ -641,12 +641,15 @@ export class DequeueSystem { // Wrap the Prisma call with tryCatch - if DB is unavailable, we still want to nack via Redis const [findError, run] = await tryCatch( - prisma.taskRun.findFirst({ - where: { id: runId }, - include: { - runtimeEnvironment: true, + this.$.runStore.findRun( + { id: runId }, + { + include: { + runtimeEnvironment: true, + }, }, - }) + prisma + ) ); // If DB is unavailable or run not found, just nack directly via Redis @@ -808,26 +811,29 @@ export class DequeueSystem { return startSpan(this.$.tracer, "getRunWithBackgroundWorkerTasks", async (span) => { span.setAttribute("run_id", runId); - const run = await prisma.taskRun.findFirst({ - where: { + const run = await this.$.runStore.findRun( + { id: runId, }, - include: { - runtimeEnvironment: { - select: { - id: true, - type: true, - archivedAt: true, + { + include: { + runtimeEnvironment: { + select: { + id: true, + type: true, + archivedAt: true, + }, }, - }, - lockedToVersion: { - include: { - deployment: true, - tasks: true, + lockedToVersion: { + include: { + deployment: true, + tasks: true, + }, }, }, }, - }); + prisma + ); if (!run) { span.setAttribute("result", "NO_RUN"); diff --git a/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts b/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts index 59d72c4c461..741ad8a14f6 100644 --- a/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts @@ -93,15 +93,18 @@ export class PendingVersionSystem { // is dropped. The planner uses the PK for `id IN (…)`; the status // predicate is a residual filter and does NOT require the status // index. - const pendingRuns = await this.$.prisma.taskRun.findMany({ - where: { - id: { in: candidateIds }, - status: "PENDING_VERSION", - }, - orderBy: { - createdAt: "asc", + const pendingRuns = await this.$.runStore.findRuns( + { + where: { + id: { in: candidateIds }, + status: "PENDING_VERSION", + }, + orderBy: { + createdAt: "asc", + }, }, - }); + this.$.prisma + ); if (!pendingRuns.length) { // CH returned candidates but all of them have already moved past @@ -135,7 +138,7 @@ export class PendingVersionSystem { return false; } - const updatedRun = await tx.taskRun.findFirstOrThrow({ where: { id: run.id } }); + const updatedRun = await this.$.runStore.findRunOrThrow({ id: run.id }, tx); await this.enqueueSystem.enqueueRun({ run: updatedRun, diff --git a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts index 1aa1738f3b0..977c94a8e83 100644 --- a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts @@ -175,56 +175,58 @@ export class RunAttemptSystem { } public async resolveTaskRunContext(runId: string): Promise { - const run = await this.$.readOnlyPrisma.taskRun.findFirst({ - where: { + const run = await this.$.runStore.findRun( + { id: runId, }, - select: { - id: true, - createdAt: true, - updatedAt: true, - executedAt: true, - baseCostInCents: true, - projectId: true, - organizationId: true, - friendlyId: true, - lockedById: true, - lockedQueueId: true, - queue: true, - attemptNumber: true, - status: true, - ttl: true, - machinePreset: true, - runTags: true, - isTest: true, - replayedFromTaskRunFriendlyId: true, - idempotencyKey: true, - idempotencyKeyOptions: true, - startedAt: true, - maxAttempts: true, - taskVersion: true, - maxDurationInSeconds: true, - usageDurationMs: true, - costInCents: true, - traceContext: true, - priorityMs: true, - taskIdentifier: true, - runtimeEnvironment: { - select: { - id: true, - slug: true, - type: true, - branchName: true, - git: true, - organizationId: true, + { + select: { + id: true, + createdAt: true, + updatedAt: true, + executedAt: true, + baseCostInCents: true, + projectId: true, + organizationId: true, + friendlyId: true, + lockedById: true, + lockedQueueId: true, + queue: true, + attemptNumber: true, + status: true, + ttl: true, + machinePreset: true, + runTags: true, + isTest: true, + replayedFromTaskRunFriendlyId: true, + idempotencyKey: true, + idempotencyKeyOptions: true, + startedAt: true, + maxAttempts: true, + taskVersion: true, + maxDurationInSeconds: true, + usageDurationMs: true, + costInCents: true, + traceContext: true, + priorityMs: true, + taskIdentifier: true, + runtimeEnvironment: { + select: { + id: true, + slug: true, + type: true, + branchName: true, + git: true, + organizationId: true, + }, }, + parentTaskRunId: true, + rootTaskRunId: true, + batchId: true, + workerQueue: true, }, - parentTaskRunId: true, - rootTaskRunId: true, - batchId: true, - workerQueue: true, - }, - }); + } + ); if (!run) { throw new ServiceValidationError("Task run not found", 404); @@ -338,21 +340,23 @@ export class RunAttemptSystem { }); } - const taskRun = await this.$.readOnlyPrisma.taskRun.findFirst({ - where: { + const taskRun = await this.$.runStore.findRun( + { id: runId, }, - select: { - id: true, - friendlyId: true, - attemptNumber: true, - projectId: true, - runtimeEnvironmentId: true, - status: true, - lockedById: true, - ttl: true, - }, - }); + { + select: { + id: true, + friendlyId: true, + attemptNumber: true, + projectId: true, + runtimeEnvironmentId: true, + status: true, + lockedById: true, + ttl: true, + }, + } + ); this.$.logger.debug("Creating a task run attempt", { taskRun }); @@ -717,14 +721,16 @@ export class RunAttemptSystem { const completedAt = new Date(); // Read current usage values to calculate new totals (safe under runLock) - const currentRun = await this.$.readOnlyPrisma.taskRun.findFirst({ - where: { id: runId }, - select: { - usageDurationMs: true, - costInCents: true, - machinePreset: true, - }, - }); + const currentRun = await this.$.runStore.findRun( + { id: runId }, + { + select: { + usageDurationMs: true, + costInCents: true, + machinePreset: true, + }, + } + ); if (!currentRun) { throw new ServiceValidationError("Run not found", 404); @@ -904,35 +910,41 @@ export class RunAttemptSystem { const failedAt = new Date(); - const retryResult = await retryOutcomeFromCompletion(this.$.readOnlyPrisma, { - runId, - error: completion.error, - retryUsingQueue: forceRequeue ?? false, - retrySettings: completion.retry, - attemptNumber: latestSnapshot.attemptNumber, - }); + const retryResult = await retryOutcomeFromCompletion( + this.$.readOnlyPrisma, + this.$.runStore, + { + runId, + error: completion.error, + retryUsingQueue: forceRequeue ?? false, + retrySettings: completion.retry, + attemptNumber: latestSnapshot.attemptNumber, + } + ); // Force requeue means it was crashed so the attempt span needs to be closed if (forceRequeue) { - const minimalRun = await this.$.readOnlyPrisma.taskRun.findFirst({ - where: { + const minimalRun = await this.$.runStore.findRun( + { id: runId, }, - select: { - status: true, - spanId: true, - maxAttempts: true, - runtimeEnvironment: { - select: { - organizationId: true, + { + select: { + status: true, + spanId: true, + maxAttempts: true, + runtimeEnvironment: { + select: { + organizationId: true, + }, }, + taskEventStore: true, + createdAt: true, + completedAt: true, + updatedAt: true, }, - taskEventStore: true, - createdAt: true, - completedAt: true, - updatedAt: true, - }, - }); + } + ); if (!minimalRun) { throw new ServiceValidationError("Run not found", 404); @@ -1367,14 +1379,16 @@ export class RunAttemptSystem { // Calculate updated usage if we have attempt duration data let usageUpdate: { usageDurationMs: number; costInCents: number } | undefined; if (attemptDurationMs !== undefined) { - const currentRun = await this.$.readOnlyPrisma.taskRun.findFirst({ - where: { id: runId }, - select: { - usageDurationMs: true, - costInCents: true, - machinePreset: true, - }, - }); + const currentRun = await this.$.runStore.findRun( + { id: runId }, + { + select: { + usageDurationMs: true, + costInCents: true, + machinePreset: true, + }, + } + ); if (!currentRun) { throw new ServiceValidationError("Run not found", 404); @@ -1578,14 +1592,16 @@ export class RunAttemptSystem { const truncatedError = this.#truncateTaskRunError(error); // Read current usage values to calculate new totals - const currentRun = await this.$.readOnlyPrisma.taskRun.findFirst({ - where: { id: runId }, - select: { - usageDurationMs: true, - costInCents: true, - machinePreset: true, - }, - }); + const currentRun = await this.$.runStore.findRun( + { id: runId }, + { + select: { + usageDurationMs: true, + costInCents: true, + machinePreset: true, + }, + } + ); if (!currentRun) { throw new ServiceValidationError("Run not found", 404); diff --git a/internal-packages/run-engine/src/engine/systems/ttlSystem.ts b/internal-packages/run-engine/src/engine/systems/ttlSystem.ts index ebd1cbdd80b..faffa2c59e5 100644 --- a/internal-packages/run-engine/src/engine/systems/ttlSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/ttlSystem.ts @@ -33,7 +33,7 @@ export class TtlSystem { } //only expire "PENDING" runs - const run = await prisma.taskRun.findFirst({ where: { id: runId } }); + const run = await this.$.runStore.findRun({ id: runId }, prisma); if (!run) { this.$.logger.debug("Could not find enqueued run to expire", { @@ -171,7 +171,7 @@ export class TtlSystem { const skipped: { runId: string; reason: string }[] = []; // Fetch all runs in a single query (no snapshot data needed) - const runs = await this.$.readOnlyPrisma.taskRun.findMany({ + const runs = await this.$.runStore.findRuns({ where: { id: { in: runIds } }, select: { id: true, diff --git a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts index 8b8d4f82fcf..29eba297be5 100644 --- a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts @@ -679,23 +679,26 @@ export class WaitpointSystem { } // 3. Get the run with environment - const run = await this.$.prisma.taskRun.findFirst({ - where: { + const run = await this.$.runStore.findRun( + { id: runId, }, - include: { - runtimeEnvironment: { - select: { - id: true, - type: true, - maximumConcurrencyLimit: true, - concurrencyLimitBurstFactor: true, - project: { select: { id: true } }, - organization: { select: { id: true } }, + { + include: { + runtimeEnvironment: { + select: { + id: true, + type: true, + maximumConcurrencyLimit: true, + concurrencyLimitBurstFactor: true, + project: { select: { id: true } }, + organization: { select: { id: true } }, + }, }, }, }, - }); + this.$.prisma + ); if (!run) { this.$.logger.error(`continueRunIfUnblocked: run not found`, { @@ -972,10 +975,11 @@ export class WaitpointSystem { environmentId: string; }): Promise { // Fast path: check if waitpoint already exists - const run = await this.$.prisma.taskRun.findFirst({ - where: { id: runId }, - include: { associatedWaitpoint: true }, - }); + const run = await this.$.runStore.findRun( + { id: runId }, + { include: { associatedWaitpoint: true } }, + this.$.prisma + ); if (!run) { throw new Error(`Run not found: ${runId}`); @@ -990,10 +994,11 @@ export class WaitpointSystem { const prisma = this.$.prisma; // Double-check after acquiring lock - const runAfterLock = await prisma.taskRun.findFirst({ - where: { id: runId }, - include: { associatedWaitpoint: true }, - }); + const runAfterLock = await this.$.runStore.findRun( + { id: runId }, + { include: { associatedWaitpoint: true } }, + prisma + ); if (!runAfterLock) { throw new Error(`Run not found: ${runId}`); From 5b74b48435bc3854d6a235b55945ad284e144eea Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 15:57:41 +0100 Subject: [PATCH 25/83] refactor(webapp): route service-layer TaskRun reads through the run store Relocate the direct TaskRun reads in webapp services, run-engine concerns, realtime, mollifier and metadata to the RunStore read methods, preserving the exact client (writer, replica, or transaction) at each site. The run hydrator now receives the store by injection. Behavior-preserving. --- .../app/models/runtimeEnvironment.server.ts | 14 +- .../concerns/idempotencyKeys.server.ts | 22 +- .../services/triggerFailedTask.server.ts | 15 +- .../runEngine/services/triggerTask.server.ts | 8 +- .../metadata/updateMetadata.server.ts | 81 +++-- .../nativeRealtimeClientInstance.server.ts | 2 + .../app/services/realtime/runReader.server.ts | 29 +- .../realtime/sessionRunManager.server.ts | 39 ++- .../app/services/realtime/sessions.server.ts | 27 +- .../shadowRealtimeClientInstance.server.ts | 3 +- .../app/services/runsBackfiller.server.ts | 30 +- .../clickhouseRunsRepository.server.ts | 105 +++--- .../app/v3/eventRepository/index.server.ts | 38 ++- apps/webapp/app/v3/failedTaskRun.server.ts | 16 +- .../v3/mollifier/mutateWithFallback.server.ts | 9 +- .../mollifier/resolveRunForMutation.server.ts | 20 +- .../webapp/app/v3/runEngineHandlers.server.ts | 314 ++++++++++-------- .../alerts/performTaskRunAlerts.server.ts | 19 +- .../app/v3/services/batchTriggerV3.server.ts | 27 +- .../v3/services/bulk/BulkActionV2.server.ts | 44 +-- .../services/cancelDevSessionRuns.server.ts | 8 +- .../app/v3/services/completeAttempt.server.ts | 13 +- .../app/v3/services/crashTaskRun.server.ts | 6 +- .../createCheckpointRestoreEvent.server.ts | 19 +- .../services/createTaskRunAttempt.server.ts | 71 ++-- .../v3/services/enqueueDelayedRun.server.ts | 43 +-- .../services/executeTasksWaitingForDeploy.ts | 45 +-- .../v3/services/expireEnqueuedRun.server.ts | 19 +- .../app/v3/services/finalizeTaskRun.server.ts | 27 +- .../app/v3/services/retryAttempt.server.ts | 6 +- .../v3/services/updateFatalRunError.server.ts | 6 +- .../app/v3/taskRunHeartbeatFailed.server.ts | 41 +-- .../test/realtime/runReaderProjection.test.ts | 4 +- 33 files changed, 642 insertions(+), 528 deletions(-) diff --git a/apps/webapp/app/models/runtimeEnvironment.server.ts b/apps/webapp/app/models/runtimeEnvironment.server.ts index be05adaa8a7..9135872417c 100644 --- a/apps/webapp/app/models/runtimeEnvironment.server.ts +++ b/apps/webapp/app/models/runtimeEnvironment.server.ts @@ -1,6 +1,7 @@ import type { AuthenticatedEnvironment } from "@internal/run-engine"; import type { Prisma, PrismaClientOrTransaction, RuntimeEnvironment } from "@trigger.dev/database"; import { $replica, prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { logger } from "~/services/logger.server"; import { getUsername } from "~/utils/username"; import { sanitizeBranchName } from "@trigger.dev/core/v3/utils/gitBranch"; @@ -251,14 +252,17 @@ export async function findEnvironmentFromRun( ): Promise { // The include (no select) already pulls every taskRun scalar, so runTags/batchId // ride along for free — no extra query for the realtime publish to send a full record. - const taskRun = await (tx ?? $replica).taskRun.findFirst({ - where: { + const taskRun = await runStore.findRun( + { id: runId, }, - include: { - runtimeEnvironment: { include: authIncludeBase }, + { + include: { + runtimeEnvironment: { include: authIncludeBase }, + }, }, - }); + tx ?? $replica + ); if (!taskRun?.runtimeEnvironment) { return null; } diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 2bdf95eb9a6..02d0ec957f2 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -151,16 +151,19 @@ export class IdempotencyKeyConcern { } const existingRun = idempotencyKey - ? await this.prisma.taskRun.findFirst({ - where: { + ? await runStore.findRun( + { runtimeEnvironmentId: request.environment.id, idempotencyKey, taskIdentifier: request.taskId, }, - include: { - associatedWaitpoint: true, + { + include: { + associatedWaitpoint: true, + }, }, - }) + this.prisma + ) : undefined; // Buffer fallback per the mollifier-idempotency design. PG missed — @@ -329,14 +332,15 @@ export class IdempotencyKeyConcern { // Another concurrent trigger committed first. Re-resolve via the // existing checks: writer-side PG findFirst first (defeats // replica lag), then buffer fallback for the buffered case. - const writerRun = await this.prisma.taskRun.findFirst({ - where: { + const writerRun = await runStore.findRun( + { runtimeEnvironmentId: request.environment.id, idempotencyKey, taskIdentifier: request.taskId, }, - include: { associatedWaitpoint: true }, - }); + { include: { associatedWaitpoint: true } }, + this.prisma + ); if (writerRun) { return { isCached: true, run: writerRun }; } diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index a8a7cbf0f3b..031411844b4 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -9,6 +9,7 @@ import { getEventRepository } from "~/v3/eventRepository/index.server"; import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRunAlerts.server"; import { DefaultQueueManager } from "../concerns/queues.server"; import type { TriggerTaskRequest } from "../types"; +import { runStore } from "~/v3/runStore.server"; export type TriggerFailedTaskRequest = { /** The task identifier (e.g. "my-task") */ @@ -82,12 +83,13 @@ export class TriggerFailedTaskService { // Resolve parent run for rootTaskRunId and depth (same as triggerTask.server.ts) const parentRun = request.parentRunId - ? await this.prisma.taskRun.findFirst({ - where: { + ? await runStore.findRun( + { id: RunId.fromFriendlyId(request.parentRunId), runtimeEnvironmentId: request.environment.id, }, - }) + this.prisma + ) : undefined; const depth = parentRun ? parentRun.depth + 1 : 0; @@ -275,12 +277,13 @@ export class TriggerFailedTaskService { let depth = 0; if (opts.parentRunId) { - const parentRun = await this.prisma.taskRun.findFirst({ - where: { + const parentRun = await runStore.findRun( + { id: RunId.fromFriendlyId(opts.parentRunId), runtimeEnvironmentId: opts.environmentId, }, - }); + this.prisma + ); if (parentRun) { parentTaskRunId = parentRun.id; diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 78455f9b686..89a938da8bf 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -67,6 +67,7 @@ import { import { mollifyTrigger } from "~/v3/mollifier/mollifierMollify.server"; import { type MollifierBuffer } from "@trigger.dev/redis-worker"; import { QueueSizeLimitExceededError, ServiceValidationError } from "~/v3/services/common.server"; +import { runStore } from "~/v3/runStore.server"; class NoopTriggerRacepointSystem implements TriggerRacepointSystem { async waitForRacepoint(options: { racepoint: TriggerRacepoints; id: string }): Promise { @@ -241,12 +242,13 @@ export class RunEngineTriggerTaskService { // Get parent run if specified const parentRun = body.options?.parentRunId - ? await this.prisma.taskRun.findFirst({ - where: { + ? await runStore.findRun( + { id: RunId.fromFriendlyId(body.options.parentRunId), runtimeEnvironmentId: environment.id, }, - }) + this.prisma + ) : undefined; // Validate parent run diff --git a/apps/webapp/app/services/metadata/updateMetadata.server.ts b/apps/webapp/app/services/metadata/updateMetadata.server.ts index 2cc057f10f2..2af44d747bd 100644 --- a/apps/webapp/app/services/metadata/updateMetadata.server.ts +++ b/apps/webapp/app/services/metadata/updateMetadata.server.ts @@ -189,18 +189,21 @@ export class UpdateMetadataService { // Fetch current run (+ the realtime membership keys, so a flush can publish) const run = yield* _( Effect.tryPromise(() => - this._prisma.taskRun.findFirst({ - where: { id: runId }, - select: { - id: true, - metadata: true, - metadataType: true, - metadataVersion: true, - runtimeEnvironmentId: true, - runTags: true, - batchId: true, + this._runStore.findRun( + { id: runId }, + { + select: { + id: true, + metadata: true, + metadataType: true, + metadataVersion: true, + runtimeEnvironmentId: true, + runTags: true, + batchId: true, + }, }, - }) + this._prisma + ) ) ); @@ -332,8 +335,8 @@ export class UpdateMetadataService { ) { const runIdType = runId.startsWith("run_") ? "friendly" : "internal"; - const taskRun = await this._prisma.taskRun.findFirst({ - where: environment + const taskRun = await this._runStore.findRun( + environment ? { runtimeEnvironmentId: environment.id, ...(runIdType === "internal" ? { id: runId } : { friendlyId: runId }), @@ -341,29 +344,32 @@ export class UpdateMetadataService { : { ...(runIdType === "internal" ? { id: runId } : { friendlyId: runId }), }, - select: { - id: true, - batchId: true, - runTags: true, - completedAt: true, - status: true, - metadata: true, - metadataType: true, - metadataVersion: true, - parentTaskRun: { - select: { - id: true, - status: true, + { + select: { + id: true, + batchId: true, + runTags: true, + completedAt: true, + status: true, + metadata: true, + metadataType: true, + metadataVersion: true, + parentTaskRun: { + select: { + id: true, + status: true, + }, }, - }, - rootTaskRun: { - select: { - id: true, - status: true, + rootTaskRun: { + select: { + id: true, + status: true, + }, }, }, }, - }); + this._prisma + ); if (!taskRun) { return; @@ -427,10 +433,13 @@ export class UpdateMetadataService { while (attempts <= MAX_RETRIES) { // Fetch the latest run data - const run = await this._prisma.taskRun.findFirst({ - where: { id: runId }, - select: { metadata: true, metadataType: true, metadataVersion: true }, - }); + const run = await this._runStore.findRun( + { id: runId }, + { + select: { metadata: true, metadataType: true, metadataVersion: true }, + }, + this._prisma + ); if (!run) { throw new Error(`Run ${runId} not found`); diff --git a/apps/webapp/app/services/realtime/nativeRealtimeClientInstance.server.ts b/apps/webapp/app/services/realtime/nativeRealtimeClientInstance.server.ts index 012c28c08fc..3f29f3faa47 100644 --- a/apps/webapp/app/services/realtime/nativeRealtimeClientInstance.server.ts +++ b/apps/webapp/app/services/realtime/nativeRealtimeClientInstance.server.ts @@ -1,5 +1,6 @@ import { getMeter } from "@internal/tracing"; import { $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { env } from "~/env.server"; import { singleton } from "~/utils/singleton"; import { getCachedLimit } from "../platform.v3.server"; @@ -122,6 +123,7 @@ function initializeNativeRealtimeClient(): NativeRealtimeClient { // One RunHydrator shared by the router and the client, so its single-flight + short-TTL cache covers both. const runReader = new RunHydrator({ replica: $replica, + runStore, cacheTtlMs: env.REALTIME_BACKEND_NATIVE_RUN_CACHE_TTL_MS, maxCacheEntries: env.REALTIME_BACKEND_NATIVE_RUN_CACHE_MAX_ENTRIES, }); diff --git a/apps/webapp/app/services/realtime/runReader.server.ts b/apps/webapp/app/services/realtime/runReader.server.ts index e8509d73de4..98ce4dc35ff 100644 --- a/apps/webapp/app/services/realtime/runReader.server.ts +++ b/apps/webapp/app/services/realtime/runReader.server.ts @@ -1,4 +1,5 @@ -import { type Prisma, type PrismaClient } from "@trigger.dev/database"; +import { type Prisma, type PrismaClient, type PrismaClientOrTransaction } from "@trigger.dev/database"; +import type { RunStore } from "@internal/run-store"; import { BoundedTtlCache } from "./boundedTtlCache"; import { RESERVED_COLUMNS, type RealtimeRunRow } from "./electricStreamProtocol.server"; @@ -79,6 +80,8 @@ export interface RunListResolver { export type RunHydratorOptions = { /** A read-replica Prisma client (`$replica`). Always Postgres. */ replica: Pick; + /** RunStore the reads are routed through; `replica` is passed as the read client. */ + runStore: RunStore; /** Read-through cache TTL (ms) collapsing duplicate refetches for the same run. Set 0 to disable. Defaults to 250ms. */ cacheTtlMs?: number; /** Hard cap on cache entries before expired entries are swept. */ @@ -139,24 +142,28 @@ export class RunHydrator { if (ids.length === 0) { return []; } - const rows = await this.options.replica.taskRun.findMany({ - where: { - runtimeEnvironmentId: environmentId, - id: { in: ids }, + const rows = await this.options.runStore.findRuns( + { + where: { + runtimeEnvironmentId: environmentId, + id: { in: ids }, + }, + select: buildHydratorSelect(skipColumns), }, - select: buildHydratorSelect(skipColumns), - }); + this.options.replica as PrismaClientOrTransaction + ); return rows as unknown as RealtimeRunRow[]; } async #fetch(environmentId: string, runId: string): Promise { - const run = await this.options.replica.taskRun.findFirst({ - where: { + const run = await this.options.runStore.findRun( + { id: runId, runtimeEnvironmentId: environmentId, }, - select: RUN_HYDRATOR_SELECT, - }); + { select: RUN_HYDRATOR_SELECT }, + this.options.replica as PrismaClientOrTransaction + ); return (run ?? null) as RealtimeRunRow | null; } diff --git a/apps/webapp/app/services/realtime/sessionRunManager.server.ts b/apps/webapp/app/services/realtime/sessionRunManager.server.ts index 1ad5174d1c6..b227f382c7b 100644 --- a/apps/webapp/app/services/realtime/sessionRunManager.server.ts +++ b/apps/webapp/app/services/realtime/sessionRunManager.server.ts @@ -2,6 +2,7 @@ import type { Session, TaskRunStatus } from "@trigger.dev/database"; import { SessionTriggerConfig as SessionTriggerConfigZod } from "@trigger.dev/core/v3"; import { z } from "zod"; import { prisma, $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { CancelTaskRunService } from "~/v3/services/cancelTaskRun.server"; @@ -119,10 +120,11 @@ export async function ensureRunForSession( // replica as "row vanished" double-triggers the session (a fast // first append after session create races the replica apply delay // and spawns a second live run consuming the same `.in`). - probe = await prisma.taskRun.findFirst({ - where: { id: session.currentRunId }, - select: { status: true, friendlyId: true }, - }); + probe = await runStore.findRun( + { id: session.currentRunId }, + { select: { status: true, friendlyId: true } }, + prisma + ); } if (probe && !isFinalRunStatus(probe.status)) { return { runId: session.currentRunId, triggered: false }; @@ -251,10 +253,11 @@ export async function ensureRunForSession( // just wrote `currentRunId` on the writer, so probe the writer too — // the replica may not have the run row yet, and a missed probe forces // another trigger+recurse until `ENSURE_RUN_FOR_SESSION_MAX_ATTEMPTS`. - const probe = await prisma.taskRun.findFirst({ - where: { id: fresh.currentRunId }, - select: { status: true, friendlyId: true }, - }); + const probe = await runStore.findRun( + { id: fresh.currentRunId }, + { select: { status: true, friendlyId: true } }, + prisma + ); if (probe && !isFinalRunStatus(probe.status)) { return { runId: fresh.currentRunId, triggered: false }; } @@ -494,10 +497,11 @@ async function getRunStatusAndFriendlyId( // `payload.previousRunId` without a second read. `Session.currentRunId` // stores the internal cuid; the agent's wire / customer hooks expose // the friendlyId via `ctx.run.id`, so consistency matters. - const row = await $replica.taskRun.findFirst({ - where: { id: runId }, - select: { status: true, friendlyId: true }, - }); + const row = await runStore.findRun( + { id: runId }, + { select: { status: true, friendlyId: true } }, + $replica + ); return row ?? null; } @@ -511,10 +515,11 @@ async function getRunStatusAndFriendlyId( * acceptable degraded behavior. */ async function resolveRunFriendlyId(runId: string): Promise { - const row = await $replica.taskRun.findFirst({ - where: { id: runId }, - select: { friendlyId: true }, - }); + const row = await runStore.findRun( + { id: runId }, + { select: { friendlyId: true } }, + $replica + ); return row?.friendlyId ?? runId; } @@ -526,7 +531,7 @@ async function cancelLostRaceRun( // Read-after-write: the run was just triggered on the writer, so go // through `prisma`. A `$replica` miss here would silently no-op the // cancel and leak an orphan run that no session is going to claim. - const run = await prisma.taskRun.findFirst({ where: { id: runId } }); + const run = await runStore.findRun({ id: runId }, prisma); if (!run) return; await service.call(run, { reason: "Lost session-run claim race" }); } diff --git a/apps/webapp/app/services/realtime/sessions.server.ts b/apps/webapp/app/services/realtime/sessions.server.ts index 55b969e7e55..a523111b5b2 100644 --- a/apps/webapp/app/services/realtime/sessions.server.ts +++ b/apps/webapp/app/services/realtime/sessions.server.ts @@ -1,6 +1,7 @@ import type { PrismaClient, Session } from "@trigger.dev/database"; import type { SessionItem } from "@trigger.dev/core/v3"; import { $replica, prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; /** * Prefix that {@link SessionId.generate} attaches to every Session friendlyId. @@ -131,10 +132,11 @@ export async function serializeSessionWithFriendlyRunId( const base = serializeSession(session); if (!session.currentRunId) return base; - const run = await $replica.taskRun.findFirst({ - where: { id: session.currentRunId }, - select: { friendlyId: true }, - }); + const run = await runStore.findRun( + { id: session.currentRunId }, + { select: { friendlyId: true } }, + $replica + ); return { ...base, @@ -158,14 +160,17 @@ export async function serializeSessionsWithFriendlyRunIds( // `currentRunId` is a plain string pointer (no FK), so scope the lookup to // the caller's tenant — a stale value must not resolve a run in another env. const runs = runIds.length - ? await $replica.taskRun.findMany({ - where: { - id: { in: runIds }, - projectId: scope.projectId, - runtimeEnvironmentId: scope.runtimeEnvironmentId, + ? await runStore.findRuns( + { + where: { + id: { in: runIds }, + projectId: scope.projectId, + runtimeEnvironmentId: scope.runtimeEnvironmentId, + }, + select: { id: true, friendlyId: true }, }, - select: { id: true, friendlyId: true }, - }) + $replica + ) : []; const friendlyIdByRunId = new Map(runs.map((run) => [run.id, run.friendlyId])); diff --git a/apps/webapp/app/services/realtime/shadowRealtimeClientInstance.server.ts b/apps/webapp/app/services/realtime/shadowRealtimeClientInstance.server.ts index 8dbb5007c20..35333f9639b 100644 --- a/apps/webapp/app/services/realtime/shadowRealtimeClientInstance.server.ts +++ b/apps/webapp/app/services/realtime/shadowRealtimeClientInstance.server.ts @@ -1,5 +1,6 @@ import { getMeter } from "@internal/tracing"; import { $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { env } from "~/env.server"; import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { singleton } from "~/utils/singleton"; @@ -20,7 +21,7 @@ function initializeShadowRealtimeClient(): ShadowRealtimeClient { }); const comparator = new RealtimeShadowComparator({ - runReader: new RunHydrator({ replica: $replica }), + runReader: new RunHydrator({ replica: $replica, runStore }), runListResolver: new ClickHouseRunListResolver({ getClickhouse: (organizationId) => clickhouseFactory.getClickhouseForOrganization(organizationId, "realtime"), diff --git a/apps/webapp/app/services/runsBackfiller.server.ts b/apps/webapp/app/services/runsBackfiller.server.ts index 7fc824f3d39..50e041ee64b 100644 --- a/apps/webapp/app/services/runsBackfiller.server.ts +++ b/apps/webapp/app/services/runsBackfiller.server.ts @@ -1,6 +1,7 @@ import { Tracer } from "@opentelemetry/api"; import type { PrismaClientOrTransaction } from "@trigger.dev/database"; import { RunsReplicationService } from "~/services/runsReplicationService.server"; +import { runStore } from "~/v3/runStore.server"; import { startSpan } from "~/v3/tracing.server"; import { FINAL_RUN_STATUSES } from "../v3/taskStatus"; import { Logger } from "@trigger.dev/core/logger"; @@ -40,22 +41,25 @@ export class RunsBackfillerService { span.setAttribute("cursor", cursor ?? ""); span.setAttribute("batchSize", batchSize ?? 0); - const runs = await this.prisma.taskRun.findMany({ - where: { - createdAt: { - gte: from, - lte: to, + const runs = await runStore.findRuns( + { + where: { + createdAt: { + gte: from, + lte: to, + }, + status: { + in: FINAL_RUN_STATUSES, + }, + ...(cursor ? { id: { gt: cursor } } : {}), }, - status: { - in: FINAL_RUN_STATUSES, + orderBy: { + id: "asc", }, - ...(cursor ? { id: { gt: cursor } } : {}), + take: batchSize, }, - orderBy: { - id: "asc", - }, - take: batchSize, - }); + this.prisma + ); if (runs.length === 0) { this.logger.info("No runs to backfill", { from, to, cursor }); diff --git a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts index 88e792b4a40..d32652a0b3b 100644 --- a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts @@ -12,6 +12,7 @@ import { } from "./runsRepository.server"; import parseDuration from "parse-duration"; import { decodeRunsCursor, encodeRunsCursor } from "./runsCursor.server"; +import { runStore } from "~/v3/runStore.server"; type RunCursorRow = { runId: string; createdAt: number }; @@ -148,16 +149,19 @@ export class ClickHouseRunsRepository implements IRunsRepository { } // Then get friendly IDs from Prisma - const runs = await this.options.prisma.taskRun.findMany({ - where: { - id: { - in: runIds, + const runs = await runStore.findRuns( + { + where: { + id: { + in: runIds, + }, + }, + select: { + friendlyId: true, }, }, - select: { - friendlyId: true, - }, - }); + this.options.prisma + ); return runs.map((run) => run.friendlyId); } @@ -165,49 +169,52 @@ export class ClickHouseRunsRepository implements IRunsRepository { async listRuns(options: ListRunsOptions) { const { runIds, pagination } = await this.listRunIds(options); - let runs = await this.options.prisma.taskRun.findMany({ - where: { - id: { - in: runIds, + let runs = await runStore.findRuns( + { + where: { + id: { + in: runIds, + }, + }, + orderBy: { + id: "desc", + }, + select: { + id: true, + friendlyId: true, + taskIdentifier: true, + taskVersion: true, + runtimeEnvironmentId: true, + status: true, + createdAt: true, + startedAt: true, + lockedAt: true, + delayUntil: true, + updatedAt: true, + completedAt: true, + isTest: true, + spanId: true, + idempotencyKey: true, + ttl: true, + expiredAt: true, + costInCents: true, + baseCostInCents: true, + usageDurationMs: true, + runTags: true, + depth: true, + rootTaskRunId: true, + batchId: true, + metadata: true, + metadataType: true, + machinePreset: true, + queue: true, + workerQueue: true, + region: true, + annotations: true, }, }, - orderBy: { - id: "desc", - }, - select: { - id: true, - friendlyId: true, - taskIdentifier: true, - taskVersion: true, - runtimeEnvironmentId: true, - status: true, - createdAt: true, - startedAt: true, - lockedAt: true, - delayUntil: true, - updatedAt: true, - completedAt: true, - isTest: true, - spanId: true, - idempotencyKey: true, - ttl: true, - expiredAt: true, - costInCents: true, - baseCostInCents: true, - usageDurationMs: true, - runTags: true, - depth: true, - rootTaskRunId: true, - batchId: true, - metadata: true, - metadataType: true, - machinePreset: true, - queue: true, - workerQueue: true, - region: true, - annotations: true, - }, - }); + this.options.prisma + ); // ClickHouse is slightly delayed, so we're going to do in-memory status filtering too if (options.statuses && options.statuses.length > 0) { diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts index 4be392535c3..c59be0f3f57 100644 --- a/apps/webapp/app/v3/eventRepository/index.server.ts +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -2,6 +2,7 @@ import { env } from "~/env.server"; import { eventRepository } from "./eventRepository.server"; import { type IEventRepository, type TraceEventOptions } from "./eventRepository.types"; import { prisma } from "~/db.server"; +import { runStore } from "../runStore.server"; import { logger } from "~/services/logger.server"; import { FEATURE_FLAG } from "../featureFlags"; import { flag } from "../featureFlags.server"; @@ -284,28 +285,31 @@ async function recordRunEvent( } async function findRunForEventCreation(runId: string) { - return prisma.taskRun.findFirst({ - where: { + return runStore.findRun( + { id: runId, }, - select: { - friendlyId: true, - taskIdentifier: true, - traceContext: true, - taskEventStore: true, - runtimeEnvironment: { - select: { - id: true, - type: true, - organizationId: true, - projectId: true, - project: { - select: { - externalRef: true, + { + select: { + friendlyId: true, + taskIdentifier: true, + traceContext: true, + taskEventStore: true, + runtimeEnvironment: { + select: { + id: true, + type: true, + organizationId: true, + projectId: true, + project: { + select: { + externalRef: true, + }, }, }, }, }, }, - }); + prisma + ); } diff --git a/apps/webapp/app/v3/failedTaskRun.server.ts b/apps/webapp/app/v3/failedTaskRun.server.ts index f4b3c92ea66..c2f58662491 100644 --- a/apps/webapp/app/v3/failedTaskRun.server.ts +++ b/apps/webapp/app/v3/failedTaskRun.server.ts @@ -37,12 +37,13 @@ export class FailedTaskRunService extends BaseService { const isFriendlyId = anyRunId.startsWith("run_"); - const taskRun = await this._prisma.taskRun.findFirst({ - where: { + const taskRun = await this.runStore.findRun( + { friendlyId: isFriendlyId ? anyRunId : undefined, id: !isFriendlyId ? anyRunId : undefined, }, - }); + this._prisma + ); if (!taskRun) { logger.error("[FailedTaskRunService] Task run not found", { @@ -90,12 +91,13 @@ export class FailedTaskRunRetryHelper extends BaseService { completion: TaskRunFailedExecutionResult; isCrash?: boolean; }) { - const taskRun = await this._prisma.taskRun.findFirst({ - where: { + const taskRun = await this.runStore.findRun( + { id: runId, }, - ...FailedTaskRunRetryGetPayload, - }); + FailedTaskRunRetryGetPayload, + this._prisma + ); if (!taskRun) { logger.error("[FailedTaskRunRetryHelper] Task run not found", { diff --git a/apps/webapp/app/v3/mollifier/mutateWithFallback.server.ts b/apps/webapp/app/v3/mollifier/mutateWithFallback.server.ts index e6deff5dbee..91c877c8133 100644 --- a/apps/webapp/app/v3/mollifier/mutateWithFallback.server.ts +++ b/apps/webapp/app/v3/mollifier/mutateWithFallback.server.ts @@ -5,7 +5,9 @@ import type { SnapshotPatch, } from "@trigger.dev/redis-worker"; import type { TaskRun } from "@trigger.dev/database"; +import type { PrismaClientOrTransaction } from "~/db.server"; import { prisma, $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { logger } from "~/services/logger.server"; import { getMollifierBuffer } from "./mollifierBuffer.server"; @@ -238,9 +240,10 @@ async function findRunInPg( friendlyId: string, environmentId: string, ): Promise { - return client.taskRun.findFirst({ - where: { friendlyId, runtimeEnvironmentId: environmentId }, - }); + return runStore.findRun( + { friendlyId, runtimeEnvironmentId: environmentId }, + client as unknown as PrismaClientOrTransaction + ); } function defaultSleep(ms: number): Promise { diff --git a/apps/webapp/app/v3/mollifier/resolveRunForMutation.server.ts b/apps/webapp/app/v3/mollifier/resolveRunForMutation.server.ts index b3db81368b9..dac12768a75 100644 --- a/apps/webapp/app/v3/mollifier/resolveRunForMutation.server.ts +++ b/apps/webapp/app/v3/mollifier/resolveRunForMutation.server.ts @@ -1,5 +1,7 @@ import type { MollifierBuffer } from "@trigger.dev/redis-worker"; +import type { PrismaClientOrTransaction } from "~/db.server"; import { $replica as defaultReplica, prisma as defaultWriter } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { getMollifierBuffer as defaultGetBuffer } from "./mollifierBuffer.server"; // Discriminated-union resolver used by mutation routes' `findResource`. @@ -41,10 +43,11 @@ export async function resolveRunForMutation(input: { const writer = input.deps?.prismaWriter ?? defaultWriter; const getBuffer = input.deps?.getBuffer ?? defaultGetBuffer; - const pgRun = await replica.taskRun.findFirst({ - where: { friendlyId: input.runParam, runtimeEnvironmentId: input.environmentId }, - select: { friendlyId: true }, - }); + const pgRun = await runStore.findRun( + { friendlyId: input.runParam, runtimeEnvironmentId: input.environmentId }, + { select: { friendlyId: true } }, + replica as PrismaClientOrTransaction + ); if (pgRun) return { source: "pg", friendlyId: pgRun.friendlyId }; const buffer = getBuffer(); @@ -72,10 +75,11 @@ export async function resolveRunForMutation(input: { // lookup-by-friendlyId timing). // Without this, the resolver returns null in degraded states that the // downstream mutateWithFallback flow would otherwise handle correctly. - const writerRun = await writer.taskRun.findFirst({ - where: { friendlyId: input.runParam, runtimeEnvironmentId: input.environmentId }, - select: { friendlyId: true }, - }); + const writerRun = await runStore.findRun( + { friendlyId: input.runParam, runtimeEnvironmentId: input.environmentId }, + { select: { friendlyId: true } }, + writer as PrismaClientOrTransaction + ); if (writerRun) return { source: "pg", friendlyId: writerRun.friendlyId }; return null; diff --git a/apps/webapp/app/v3/runEngineHandlers.server.ts b/apps/webapp/app/v3/runEngineHandlers.server.ts index 082974af388..e2285a4fecc 100644 --- a/apps/webapp/app/v3/runEngineHandlers.server.ts +++ b/apps/webapp/app/v3/runEngineHandlers.server.ts @@ -20,6 +20,7 @@ import { createExceptionPropertiesFromError } from "./eventRepository/common.ser import { getEventRepositoryForStore, recordRunDebugLog } from "./eventRepository/index.server"; import { roomFromFriendlyRunId, socketIo } from "./handleSocketIo.server"; import { engine } from "./runEngine.server"; +import { runStore } from "./runStore.server"; import { publishChangeRecord } from "~/services/realtime/runChangeNotifierInstance.server"; import { PerformTaskRunAlertsService } from "./services/alerts/performTaskRunAlerts.server"; import { TaskRunErrorCodes } from "@trigger.dev/core/v3"; @@ -27,32 +28,35 @@ import { TaskRunErrorCodes } from "@trigger.dev/core/v3"; export function registerRunEngineEventBusHandlers() { engine.eventBus.on("runSucceeded", async ({ time, run, organization, environment }) => { const [taskRunError, taskRun] = await tryCatch( - $replica.taskRun.findFirstOrThrow({ - where: { + runStore.findRunOrThrow( + { id: run.id, }, - select: { - id: true, - friendlyId: true, - traceId: true, - spanId: true, - parentSpanId: true, - createdAt: true, - completedAt: true, - taskIdentifier: true, - projectId: true, - runtimeEnvironmentId: true, - environmentType: true, - isTest: true, - organizationId: true, - taskEventStore: true, - // Piggyback the realtime run-changed publish on this existing read so the - // per-env channel carries the membership keys (no separate query). No-op when - // the native backend is disabled. - runTags: true, - batchId: true, + { + select: { + id: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + runtimeEnvironmentId: true, + environmentType: true, + isTest: true, + organizationId: true, + taskEventStore: true, + // Piggyback the realtime run-changed publish on this existing read so the + // per-env channel carries the membership keys (no separate query). No-op when + // the native backend is disabled. + runTags: true, + batchId: true, + }, }, - }) + $replica + ) ); if (taskRunError) { @@ -110,31 +114,34 @@ export function registerRunEngineEventBusHandlers() { const exception = createExceptionPropertiesFromError(sanitizedError); const [taskRunError, taskRun] = await tryCatch( - $replica.taskRun.findFirstOrThrow({ - where: { + runStore.findRunOrThrow( + { id: run.id, }, - select: { - id: true, - friendlyId: true, - traceId: true, - spanId: true, - parentSpanId: true, - createdAt: true, - completedAt: true, - taskIdentifier: true, - projectId: true, - runtimeEnvironmentId: true, - environmentType: true, - isTest: true, - organizationId: true, - taskEventStore: true, - // Piggyback the realtime run-changed publish on this existing read (no-op when - // the native backend is disabled). - runTags: true, - batchId: true, + { + select: { + id: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + runtimeEnvironmentId: true, + environmentType: true, + isTest: true, + organizationId: true, + taskEventStore: true, + // Piggyback the realtime run-changed publish on this existing read (no-op when + // the native backend is disabled). + runTags: true, + batchId: true, + }, }, - }) + $replica + ) ); if (taskRunError) { @@ -179,31 +186,34 @@ export function registerRunEngineEventBusHandlers() { const exception = createExceptionPropertiesFromError(sanitizedError); const [taskRunError, taskRun] = await tryCatch( - $replica.taskRun.findFirstOrThrow({ - where: { + runStore.findRunOrThrow( + { id: run.id, }, - select: { - id: true, - friendlyId: true, - traceId: true, - spanId: true, - parentSpanId: true, - createdAt: true, - completedAt: true, - taskIdentifier: true, - projectId: true, - runtimeEnvironmentId: true, - environmentType: true, - isTest: true, - organizationId: true, - taskEventStore: true, - // Piggyback the realtime run-changed publish on this existing read (no-op when - // the native backend is disabled). - runTags: true, - batchId: true, + { + select: { + id: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + runtimeEnvironmentId: true, + environmentType: true, + isTest: true, + organizationId: true, + taskEventStore: true, + // Piggyback the realtime run-changed publish on this existing read (no-op when + // the native backend is disabled). + runTags: true, + batchId: true, + }, }, - }) + $replica + ) ); if (taskRunError) { @@ -265,26 +275,29 @@ export function registerRunEngineEventBusHandlers() { } const [cachedRunError, cachedRun] = await tryCatch( - $replica.taskRun.findFirstOrThrow({ - where: { + runStore.findRunOrThrow( + { id: cachedRunId, }, - select: { - id: true, - friendlyId: true, - traceId: true, - spanId: true, - parentSpanId: true, - createdAt: true, - completedAt: true, - taskIdentifier: true, - projectId: true, - runtimeEnvironmentId: true, - environmentType: true, - isTest: true, - organizationId: true, + { + select: { + id: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + runtimeEnvironmentId: true, + environmentType: true, + isTest: true, + organizationId: true, + }, }, - }) + $replica + ) ); if (cachedRunError) { @@ -296,27 +309,30 @@ export function registerRunEngineEventBusHandlers() { } const [blockedRunError, blockedRun] = await tryCatch( - $replica.taskRun.findFirst({ - where: { + runStore.findRun( + { id: blockedRunId, }, - select: { - id: true, - friendlyId: true, - traceId: true, - spanId: true, - parentSpanId: true, - createdAt: true, - completedAt: true, - taskIdentifier: true, - projectId: true, - runtimeEnvironmentId: true, - environmentType: true, - isTest: true, - organizationId: true, - taskEventStore: true, + { + select: { + id: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + runtimeEnvironmentId: true, + environmentType: true, + isTest: true, + organizationId: true, + taskEventStore: true, + }, }, - }) + $replica + ) ); if (blockedRunError) { @@ -372,31 +388,34 @@ export function registerRunEngineEventBusHandlers() { } const [taskRunError, taskRun] = await tryCatch( - $replica.taskRun.findFirstOrThrow({ - where: { + runStore.findRunOrThrow( + { id: run.id, }, - select: { - id: true, - friendlyId: true, - traceId: true, - spanId: true, - parentSpanId: true, - createdAt: true, - completedAt: true, - taskIdentifier: true, - projectId: true, - runtimeEnvironmentId: true, - environmentType: true, - isTest: true, - organizationId: true, - taskEventStore: true, - // Piggyback the realtime run-changed publish on this existing read (no-op when - // the native backend is disabled). - runTags: true, - batchId: true, + { + select: { + id: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + runtimeEnvironmentId: true, + environmentType: true, + isTest: true, + organizationId: true, + taskEventStore: true, + // Piggyback the realtime run-changed publish on this existing read (no-op when + // the native backend is disabled). + runTags: true, + batchId: true, + }, }, - }) + $replica + ) ); if (taskRunError) { @@ -438,31 +457,34 @@ export function registerRunEngineEventBusHandlers() { engine.eventBus.on("runCancelled", async ({ time, run, organization, environment }) => { const [taskRunError, taskRun] = await tryCatch( - $replica.taskRun.findFirstOrThrow({ - where: { + runStore.findRunOrThrow( + { id: run.id, }, - select: { - id: true, - friendlyId: true, - traceId: true, - spanId: true, - parentSpanId: true, - createdAt: true, - completedAt: true, - taskIdentifier: true, - projectId: true, - runtimeEnvironmentId: true, - environmentType: true, - isTest: true, - organizationId: true, - taskEventStore: true, - // Piggyback the realtime run-changed publish on this existing read (no-op when - // the native backend is disabled). - runTags: true, - batchId: true, + { + select: { + id: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + runtimeEnvironmentId: true, + environmentType: true, + isTest: true, + organizationId: true, + taskEventStore: true, + // Piggyback the realtime run-changed publish on this existing read (no-op when + // the native backend is disabled). + runTags: true, + batchId: true, + }, }, - }) + $replica + ) ); if (taskRunError) { diff --git a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts index 9c055346232..31912c39fd0 100644 --- a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts +++ b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts @@ -12,17 +12,20 @@ type FoundRun = Prisma.Result< export class PerformTaskRunAlertsService extends BaseService { public async call(runId: string) { - const run = await this._prisma.taskRun.findFirst({ - where: { id: runId }, - include: { - lockedBy: true, - runtimeEnvironment: { - include: { - parentEnvironment: true, + const run = await this.runStore.findRun( + { id: runId }, + { + include: { + lockedBy: true, + runtimeEnvironment: { + include: { + parentEnvironment: true, + }, }, }, }, - }); + this._prisma + ); if (!run) { return; diff --git a/apps/webapp/app/v3/services/batchTriggerV3.server.ts b/apps/webapp/app/v3/services/batchTriggerV3.server.ts index 33036871599..c001932baad 100644 --- a/apps/webapp/app/v3/services/batchTriggerV3.server.ts +++ b/apps/webapp/app/v3/services/batchTriggerV3.server.ts @@ -352,20 +352,23 @@ export class BatchTriggerV3Service extends BaseService { // Fetch cached runs for each task identifier separately to make use of the index const cachedRuns = await Promise.all( Object.entries(itemsByTask).map(([taskIdentifier, items]) => - this._prisma.taskRun.findMany({ - where: { - runtimeEnvironmentId: environment.id, - taskIdentifier, - idempotencyKey: { - in: items.map((i) => i.options?.idempotencyKey).filter(Boolean), + this.runStore.findRuns( + { + where: { + runtimeEnvironmentId: environment.id, + taskIdentifier, + idempotencyKey: { + in: items.map((i) => i.options?.idempotencyKey).filter(Boolean), + }, + }, + select: { + friendlyId: true, + idempotencyKey: true, + idempotencyKeyExpiresAt: true, }, }, - select: { - friendlyId: true, - idempotencyKey: true, - idempotencyKeyExpiresAt: true, - }, - }) + this._prisma + ) ) ).then((results) => results.flat()); diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts index babdb02ca6a..76d550c7008 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts @@ -182,22 +182,25 @@ export class BulkActionService extends BaseService { case BulkActionType.CANCEL: { const cancelService = new CancelTaskRunService(this._prisma); - const runs = await this._replica.taskRun.findMany({ - where: { - id: { - in: runIdsToProcess, + const runs = await this.runStore.findRuns( + { + where: { + id: { + in: runIdsToProcess, + }, + }, + select: { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, }, }, - select: { - id: true, - engine: true, - friendlyId: true, - status: true, - createdAt: true, - completedAt: true, - taskEventStore: true, - }, - }); + this._replica + ); await pMap( runs, @@ -233,13 +236,16 @@ export class BulkActionService extends BaseService { case BulkActionType.REPLAY: { const replayService = new ReplayTaskRunService(this._prisma); - const runs = await this._replica.taskRun.findMany({ - where: { - id: { - in: runIdsToProcess, + const runs = await this.runStore.findRuns( + { + where: { + id: { + in: runIdsToProcess, + }, }, }, - }); + this._replica + ); await pMap( runs, diff --git a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts index f779d81641f..c1562275e58 100644 --- a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts +++ b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts @@ -68,12 +68,8 @@ export class CancelDevSessionRunsService extends BaseService { logger.debug("Cancelling in progress run", { runId }); const taskRun = runId.startsWith("run_") - ? await this._prisma.taskRun.findFirst({ - where: { friendlyId: runId }, - }) - : await this._prisma.taskRun.findFirst({ - where: { id: runId }, - }); + ? await this.runStore.findRun({ friendlyId: runId }, this._prisma) + : await this.runStore.findRun({ id: runId }, this._prisma); if (!taskRun) { return; diff --git a/apps/webapp/app/v3/services/completeAttempt.server.ts b/apps/webapp/app/v3/services/completeAttempt.server.ts index c4076648819..22a9047c3fe 100644 --- a/apps/webapp/app/v3/services/completeAttempt.server.ts +++ b/apps/webapp/app/v3/services/completeAttempt.server.ts @@ -70,14 +70,17 @@ export class CompleteAttemptService extends BaseService { id: execution.attempt.id, }); - const run = await this._prisma.taskRun.findFirst({ - where: { + const run = await this.runStore.findRun( + { friendlyId: execution.run.id, }, - select: { - id: true, + { + select: { + id: true, + }, }, - }); + this._prisma + ); if (!run) { logger.error("[CompleteAttemptService] Task run not found", { diff --git a/apps/webapp/app/v3/services/crashTaskRun.server.ts b/apps/webapp/app/v3/services/crashTaskRun.server.ts index cd55b9ec0f9..bff4b8d65b1 100644 --- a/apps/webapp/app/v3/services/crashTaskRun.server.ts +++ b/apps/webapp/app/v3/services/crashTaskRun.server.ts @@ -35,11 +35,7 @@ export class CrashTaskRunService extends BaseService { return; } - const taskRun = await this._prisma.taskRun.findFirst({ - where: { - id: runId, - }, - }); + const taskRun = await this.runStore.findRun({ id: runId }, this._prisma); if (!taskRun) { logger.error("[CrashTaskRunService] Task run not found", { runId }); diff --git a/apps/webapp/app/v3/services/createCheckpointRestoreEvent.server.ts b/apps/webapp/app/v3/services/createCheckpointRestoreEvent.server.ts index 63a8b6bb9aa..59c37947178 100644 --- a/apps/webapp/app/v3/services/createCheckpointRestoreEvent.server.ts +++ b/apps/webapp/app/v3/services/createCheckpointRestoreEvent.server.ts @@ -58,19 +58,22 @@ export class CreateCheckpointRestoreEventService extends BaseService { let taskRunDependencyId: string | undefined; if (params.dependencyFriendlyRunId) { - const run = await this._prisma.taskRun.findFirst({ - where: { + const run = await this.runStore.findRun( + { friendlyId: params.dependencyFriendlyRunId, }, - select: { - id: true, - dependency: { - select: { - id: true, + { + select: { + id: true, + dependency: { + select: { + id: true, + }, }, }, }, - }); + this._prisma + ); taskRunDependencyId = run?.dependency?.id; diff --git a/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts b/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts index 8be2b9557cc..dbc4c576b75 100644 --- a/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts +++ b/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts @@ -12,6 +12,7 @@ import { FINAL_RUN_STATUSES } from "../taskStatus"; import { BaseService, ServiceValidationError } from "./baseService.server"; import { CrashTaskRunService } from "./crashTaskRun.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; +import { runStore } from "../runStore.server"; export class CreateTaskRunAttemptService extends BaseService { public async call({ @@ -45,43 +46,46 @@ export class CreateTaskRunAttemptService extends BaseService { span.setAttribute("taskRunId", runId); } - const taskRun = await this._prisma.taskRun.findFirst({ - where: { + const taskRun = await this.runStore.findRun( + { id: !isFriendlyId ? runId : undefined, friendlyId: isFriendlyId ? runId : undefined, runtimeEnvironmentId: environment.id, }, - include: { - attempts: { - take: 1, - orderBy: { - number: "desc", + { + include: { + attempts: { + take: 1, + orderBy: { + number: "desc", + }, }, - }, - lockedBy: { - include: { - worker: { - select: { - id: true, - version: true, - sdkVersion: true, - cliVersion: true, - supportsLazyAttempts: true, + lockedBy: { + include: { + worker: { + select: { + id: true, + version: true, + sdkVersion: true, + cliVersion: true, + supportsLazyAttempts: true, + }, }, }, }, - }, - batchItems: { - include: { - batchTaskRun: { - select: { - friendlyId: true, + batchItems: { + include: { + batchTaskRun: { + select: { + friendlyId: true, + }, }, }, }, }, }, - }); + this._prisma + ); logger.debug("Creating a task run attempt", { taskRun }); @@ -263,20 +267,23 @@ async function getAuthenticatedEnvironmentFromRun( ) { const isFriendlyId = friendlyId.startsWith("run_"); - const taskRun = await (prismaClient ?? prisma).taskRun.findFirst({ - where: { + const taskRun = await runStore.findRun( + { id: !isFriendlyId ? friendlyId : undefined, friendlyId: isFriendlyId ? friendlyId : undefined, }, - include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, + { + include: { + runtimeEnvironment: { + include: { + organization: true, + project: true, + }, }, }, }, - }); + prismaClient ?? prisma + ); if (!taskRun) { return; diff --git a/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts b/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts index 0b6149dfae6..79cb4fb0976 100644 --- a/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts +++ b/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts @@ -32,37 +32,40 @@ export class EnqueueDelayedRunService extends BaseService { } public async call(runId: string) { - const run = await this._prisma.taskRun.findFirst({ - where: { + const run = await this.runStore.findRun( + { id: runId, }, - include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, + { + include: { + runtimeEnvironment: { + include: { + organization: true, + project: true, + }, }, - }, - dependency: { - include: { - dependentBatchRun: { - include: { - dependentTaskAttempt: { - include: { - taskRun: true, + dependency: { + include: { + dependentBatchRun: { + include: { + dependentTaskAttempt: { + include: { + taskRun: true, + }, }, }, }, - }, - dependentAttempt: { - include: { - taskRun: true, + dependentAttempt: { + include: { + taskRun: true, + }, }, }, }, }, }, - }); + this._prisma + ); if (!run) { logger.debug("Could not find delayed run to enqueue", { diff --git a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts index fb519b43151..a77727c9242 100644 --- a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts +++ b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts @@ -39,29 +39,32 @@ export class ExecuteTasksWaitingForDeployService extends BaseService { const maxCount = env.LEGACY_RUN_ENGINE_WAITING_FOR_DEPLOY_BATCH_SIZE; - const runsWaitingForDeploy = await this._replica.taskRun.findMany({ - where: { - runtimeEnvironmentId: backgroundWorker.runtimeEnvironmentId, - projectId: backgroundWorker.projectId, - status: "WAITING_FOR_DEPLOY", - taskIdentifier: { - in: backgroundWorker.tasks.map((task) => task.slug), + const runsWaitingForDeploy = await this.runStore.findRuns( + { + where: { + runtimeEnvironmentId: backgroundWorker.runtimeEnvironmentId, + projectId: backgroundWorker.projectId, + status: "WAITING_FOR_DEPLOY", + taskIdentifier: { + in: backgroundWorker.tasks.map((task) => task.slug), + }, }, + orderBy: { + createdAt: "asc", + }, + select: { + id: true, + status: true, + taskIdentifier: true, + concurrencyKey: true, + queue: true, + updatedAt: true, + createdAt: true, + }, + take: maxCount + 1, }, - orderBy: { - createdAt: "asc", - }, - select: { - id: true, - status: true, - taskIdentifier: true, - concurrencyKey: true, - queue: true, - updatedAt: true, - createdAt: true, - }, - take: maxCount + 1, - }); + this._replica + ); if (!runsWaitingForDeploy.length) { return; diff --git a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts index 0409b6ed956..12ccddbf2e6 100644 --- a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts +++ b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts @@ -23,19 +23,22 @@ export class ExpireEnqueuedRunService extends BaseService { } public async call(runId: string) { - const run = await this._prisma.taskRun.findFirst({ - where: { + const run = await this.runStore.findRun( + { id: runId, }, - include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, + { + include: { + runtimeEnvironment: { + include: { + organization: true, + project: true, + }, }, }, }, - }); + this._prisma + ); if (!run) { logger.debug("Could not find enqueued run to expire", { diff --git a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts index ab51df5de60..b770ceef177 100644 --- a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts +++ b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts @@ -152,22 +152,25 @@ export class FinalizeTaskRunService extends BaseService { if (isFatalRunStatus(run.status)) { logger.warn("FinalizeTaskRunService: Fatal status", { runId: run.id, status: run.status }); - const extendedRun = await this._prisma.taskRun.findFirst({ - where: { id: run.id }, - select: { - id: true, - lockedToVersion: { - select: { - supportsLazyAttempts: true, + const extendedRun = await this.runStore.findRun( + { id: run.id }, + { + select: { + id: true, + lockedToVersion: { + select: { + supportsLazyAttempts: true, + }, }, - }, - runtimeEnvironment: { - select: { - type: true, + runtimeEnvironment: { + select: { + type: true, + }, }, }, }, - }); + this._prisma + ); if (extendedRun && extendedRun.runtimeEnvironment.type !== "DEVELOPMENT") { logger.warn("FinalizeTaskRunService: Fatal status, requesting worker exit", { diff --git a/apps/webapp/app/v3/services/retryAttempt.server.ts b/apps/webapp/app/v3/services/retryAttempt.server.ts index b4ab5235761..6ed83c10807 100644 --- a/apps/webapp/app/v3/services/retryAttempt.server.ts +++ b/apps/webapp/app/v3/services/retryAttempt.server.ts @@ -5,11 +5,7 @@ import { BaseService } from "./baseService.server"; export class RetryAttemptService extends BaseService { public async call(runId: string) { - const taskRun = await this._prisma.taskRun.findFirst({ - where: { - id: runId, - }, - }); + const taskRun = await this.runStore.findRun({ id: runId }, this._prisma); if (!taskRun) { logger.error("Task run not found", { runId }); diff --git a/apps/webapp/app/v3/services/updateFatalRunError.server.ts b/apps/webapp/app/v3/services/updateFatalRunError.server.ts index 2363d241c0c..dcf2488f273 100644 --- a/apps/webapp/app/v3/services/updateFatalRunError.server.ts +++ b/apps/webapp/app/v3/services/updateFatalRunError.server.ts @@ -20,11 +20,7 @@ export class UpdateFatalRunErrorService extends BaseService { logger.debug("UpdateFatalRunErrorService.call", { runId, opts }); - const taskRun = await this._prisma.taskRun.findFirst({ - where: { - id: runId, - }, - }); + const taskRun = await this.runStore.findRun({ id: runId }, this._prisma); if (!taskRun) { logger.error("[UpdateFatalRunErrorService] Task run not found", { runId }); diff --git a/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts b/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts index 8359cc4a4aa..c472bff53ee 100644 --- a/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts +++ b/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts @@ -11,32 +11,35 @@ import { TaskRunErrorCodes } from "@trigger.dev/core/v3"; export class TaskRunHeartbeatFailedService extends BaseService { public async call(runId: string) { - const taskRun = await this._prisma.taskRun.findFirst({ - where: { + const taskRun = await this.runStore.findRun( + { id: runId, }, - select: { - id: true, - friendlyId: true, - status: true, - lockedAt: true, - runtimeEnvironment: { - select: { - type: true, + { + select: { + id: true, + friendlyId: true, + status: true, + lockedAt: true, + runtimeEnvironment: { + select: { + type: true, + }, }, - }, - lockedToVersion: { - select: { - supportsLazyAttempts: true, + lockedToVersion: { + select: { + supportsLazyAttempts: true, + }, }, - }, - _count: { - select: { - attempts: true, + _count: { + select: { + attempts: true, + }, }, }, }, - }); + this._prisma + ); if (!taskRun) { logger.error("[TaskRunHeartbeatFailedService] Task run not found", { diff --git a/apps/webapp/test/realtime/runReaderProjection.test.ts b/apps/webapp/test/realtime/runReaderProjection.test.ts index 07aebf92589..ad6616f5464 100644 --- a/apps/webapp/test/realtime/runReaderProjection.test.ts +++ b/apps/webapp/test/realtime/runReaderProjection.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it, vi } from "vitest"; +import { PostgresRunStore } from "@internal/run-store"; import { buildHydratorSelect, RunHydrator } from "~/services/realtime/runReader.server"; describe("buildHydratorSelect", () => { @@ -54,7 +55,8 @@ describe("RunHydrator.hydrateByIds column projection", () => { }), }, } as any; - return { hydrator: new RunHydrator({ replica }), getSelect: () => capturedSelect }; + const runStore = new PostgresRunStore({ prisma: replica, readOnlyPrisma: replica }); + return { hydrator: new RunHydrator({ replica, runStore }), getSelect: () => capturedSelect }; } it("projects the SELECT by skipColumns", async () => { From 5683952331df5bb7f79d1cc0afec66b323799c4e Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 16:12:57 +0100 Subject: [PATCH 26/83] refactor(webapp): route presenter TaskRun reads through the run store Relocate the dashboard presenter TaskRun reads to the RunStore read methods, preserving the exact client per site. Behavior-preserving. --- .../v3/ApiRetrieveRunPresenter.server.ts | 56 ++++----- .../v3/ApiRunResultPresenter.server.ts | 18 +-- .../v3/NextRunListPresenter.server.ts | 8 +- .../app/presenters/v3/RunPresenter.server.ts | 108 +++++++++--------- .../v3/RunStreamPresenter.server.ts | 14 ++- .../v3/SessionListPresenter.server.ts | 18 +-- .../presenters/v3/SessionPresenter.server.ts | 23 ++-- .../app/presenters/v3/SpanPresenter.server.ts | 76 ++++++------ .../presenters/v3/TestTaskPresenter.server.ts | 62 +++++----- 9 files changed, 213 insertions(+), 170 deletions(-) diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index fec8dabdb0e..68e3643f9e9 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -22,6 +22,7 @@ import { type SyntheticRun, } from "~/v3/mollifier/readFallback.server"; import { generatePresignedUrl } from "~/v3/objectStore.server"; +import { runStore } from "~/v3/runStore.server"; import { tracer } from "~/v3/tracer.server"; import { startSpanWithEnv } from "~/v3/tracing.server"; @@ -110,38 +111,41 @@ export class ApiRetrieveRunPresenter { friendlyId: string, env: AuthenticatedEnvironment, ): Promise { - const pgRow = await $replica.taskRun.findFirst({ - where: { + const pgRow = await runStore.findRun( + { friendlyId, runtimeEnvironmentId: env.id, }, - select: { - ...commonRunSelect, - traceId: true, - payload: true, - payloadType: true, - output: true, - outputType: true, - error: true, - attempts: { - select: { - id: true, + { + select: { + ...commonRunSelect, + traceId: true, + payload: true, + payloadType: true, + output: true, + outputType: true, + error: true, + attempts: { + select: { + id: true, + }, + }, + attemptNumber: true, + engine: true, + taskEventStore: true, + parentTaskRun: { + select: commonRunSelect, + }, + rootTaskRun: { + select: commonRunSelect, + }, + childRuns: { + select: commonRunSelect, }, - }, - attemptNumber: true, - engine: true, - taskEventStore: true, - parentTaskRun: { - select: commonRunSelect, - }, - rootTaskRun: { - select: commonRunSelect, - }, - childRuns: { - select: commonRunSelect, }, }, - }); + $replica + ); if (pgRow) return { ...pgRow, isBuffered: false }; diff --git a/apps/webapp/app/presenters/v3/ApiRunResultPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRunResultPresenter.server.ts index c11a04a1581..7e0540674e8 100644 --- a/apps/webapp/app/presenters/v3/ApiRunResultPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRunResultPresenter.server.ts @@ -1,6 +1,7 @@ import { TaskRunExecutionResult } from "@trigger.dev/core/v3"; import { executionResultForTaskRun } from "~/models/taskRun.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { runStore } from "~/v3/runStore.server"; import { BasePresenter } from "./basePresenter.server"; export class ApiRunResultPresenter extends BasePresenter { @@ -9,19 +10,22 @@ export class ApiRunResultPresenter extends BasePresenter { env: AuthenticatedEnvironment ): Promise { return this.traceWithEnv("call", env, async (span) => { - const taskRun = await this._prisma.taskRun.findFirst({ - where: { + const taskRun = await runStore.findRun( + { friendlyId, runtimeEnvironmentId: env.id, }, - include: { - attempts: { - orderBy: { - createdAt: "desc", + { + include: { + attempts: { + orderBy: { + createdAt: "desc", + }, }, }, }, - }); + this._prisma + ); if (!taskRun) { return undefined; diff --git a/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts index 3594aa71cea..2e587e8c4a7 100644 --- a/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts @@ -13,6 +13,7 @@ import { getTaskIdentifiers } from "~/models/task.server"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { regionForDisplay } from "~/runEngine/concerns/workerQueueSplit.server"; import { machinePresetFromRun } from "~/v3/machinePresets.server"; +import { runStore } from "~/v3/runStore.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { isCancellableRunStatus, isFinalRunStatus, isPendingRunStatus } from "~/v3/taskStatus"; @@ -206,11 +207,12 @@ export class NextRunListPresenter { let hasAnyRuns = runs.length > 0; if (!hasAnyRuns) { - const firstRun = await this.replica.taskRun.findFirst({ - where: { + const firstRun = await runStore.findRun( + { runtimeEnvironmentId: environmentId, }, - }); + this.replica + ); if (firstRun) { hasAnyRuns = true; diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index 1ff68e9b96f..c4c3ac88c48 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -8,6 +8,7 @@ import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { isFinalRunStatus } from "~/v3/taskStatus"; import { env } from "~/env.server"; import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { runStore } from "~/v3/runStore.server"; type Result = Awaited>; export type Run = Result["run"]; @@ -62,57 +63,8 @@ export class RunPresenter { // buffer view. `findFirstOrThrow` would log a `PrismaClient error` // every tick of the page poll, masking real DB issues with synthetic // not-found noise. - const run = await this.#prismaClient.taskRun.findFirst({ - select: { - id: true, - createdAt: true, - taskEventStore: true, - taskIdentifier: true, - number: true, - traceId: true, - spanId: true, - parentSpanId: true, - friendlyId: true, - status: true, - startedAt: true, - completedAt: true, - logsDeletedAt: true, - annotations: true, - rootTaskRun: { - select: { - friendlyId: true, - spanId: true, - createdAt: true, - }, - }, - parentTaskRun: { - select: { - friendlyId: true, - spanId: true, - createdAt: true, - }, - }, - runtimeEnvironment: { - select: { - id: true, - type: true, - slug: true, - organizationId: true, - orgMember: { - select: { - user: { - select: { - id: true, - name: true, - displayName: true, - }, - }, - }, - }, - }, - }, - }, - where: { + const run = await runStore.findRun( + { friendlyId: runFriendlyId, project: { slug: projectSlug, @@ -125,7 +77,59 @@ export class RunPresenter { }, }, }, - }); + { + select: { + id: true, + createdAt: true, + taskEventStore: true, + taskIdentifier: true, + number: true, + traceId: true, + spanId: true, + parentSpanId: true, + friendlyId: true, + status: true, + startedAt: true, + completedAt: true, + logsDeletedAt: true, + annotations: true, + rootTaskRun: { + select: { + friendlyId: true, + spanId: true, + createdAt: true, + }, + }, + parentTaskRun: { + select: { + friendlyId: true, + spanId: true, + createdAt: true, + }, + }, + runtimeEnvironment: { + select: { + id: true, + type: true, + slug: true, + organizationId: true, + orgMember: { + select: { + user: { + select: { + id: true, + name: true, + displayName: true, + }, + }, + }, + }, + }, + }, + }, + }, + this.#prismaClient + ); if (!run) { throw new RunNotInPgError(runFriendlyId); diff --git a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts index e0e88e4dd02..ab777d0b8e9 100644 --- a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts @@ -6,6 +6,7 @@ import { ABORT_REASON_SEND_ERROR, createSSELoader, SendFunction } from "~/utils/ import { throttle } from "~/utils/throttle"; import { getMollifierBuffer } from "~/v3/mollifier/mollifierBuffer.server"; import { deserialiseMollifierSnapshot } from "~/v3/mollifier/mollifierSnapshot.server"; +import { runStore } from "~/v3/runStore.server"; import { tracePubSub } from "~/v3/services/tracePubSub.server"; const PING_INTERVAL = 5_000; @@ -36,8 +37,8 @@ export class RunStreamPresenter { // Scope the lookup to organizations the requesting user is a member // of, matching RunPresenter's run lookup. Unauthorized and missing // runs are indistinguishable (both 404). - const run = await prismaClient.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runFriendlyId, project: { organization: { @@ -49,10 +50,13 @@ export class RunStreamPresenter { }, }, }, - select: { - traceId: true, + { + select: { + traceId: true, + }, }, - }); + prismaClient + ); // Fall back to the mollifier buffer when the run isn't in PG yet. // The buffered run has no execution events to stream, but we still diff --git a/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts index 0586ab8eced..bff1bda0177 100644 --- a/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SessionListPresenter.server.ts @@ -10,6 +10,7 @@ import { } from "~/services/sessionsRepository/sessionsRepository.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { findCurrentWorkerFromEnvironment } from "~/v3/models/workerDeployment.server"; +import { runStore } from "~/v3/runStore.server"; import { startActiveSpan } from "~/v3/tracer.server"; export type SessionListOptions = { @@ -189,14 +190,17 @@ export class SessionListPresenter { // pointer could surface another tenant's run. The list query above // is already env-scoped; the run lookup needs the same fence. return currentRunIds.length > 0 - ? this.replica.taskRun.findMany({ - where: { - id: { in: currentRunIds }, - projectId, - runtimeEnvironmentId: environmentId, + ? runStore.findRuns( + { + where: { + id: { in: currentRunIds }, + projectId, + runtimeEnvironmentId: environmentId, + }, + select: { id: true, friendlyId: true }, }, - select: { id: true, friendlyId: true }, - }) + this.replica + ) : []; } ); diff --git a/apps/webapp/app/presenters/v3/SessionPresenter.server.ts b/apps/webapp/app/presenters/v3/SessionPresenter.server.ts index c63f9e39a2a..36ef46d4b4e 100644 --- a/apps/webapp/app/presenters/v3/SessionPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SessionPresenter.server.ts @@ -6,6 +6,7 @@ import { chatSnapshotStorageKey } from "~/services/realtime/chatSnapshot.server" import { resolveSessionByIdOrExternalId } from "~/services/realtime/sessions.server"; import { logger } from "~/services/logger.server"; import { generatePresignedUrl } from "~/v3/objectStore.server"; +import { runStore } from "~/v3/runStore.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { startActiveSpan } from "~/v3/tracer.server"; @@ -96,10 +97,13 @@ export class SessionPresenter { async (span) => { span.setAttribute("runIds.count", runIds.length); return runIds.length > 0 - ? this.replica.taskRun.findMany({ - where: { id: { in: runIds } }, - select: { id: true, friendlyId: true, status: true }, - }) + ? runStore.findRuns( + { + where: { id: { in: runIds } }, + select: { id: true, friendlyId: true, status: true }, + }, + this.replica + ) : []; } ); @@ -110,10 +114,13 @@ export class SessionPresenter { (await startActiveSpan( "SessionPresenter.findCurrentRunFallback", () => - this.replica.taskRun.findFirst({ - where: { id: session.currentRunId! }, - select: { id: true, friendlyId: true, status: true }, - }) + runStore.findRun( + { id: session.currentRunId! }, + { + select: { id: true, friendlyId: true, status: true }, + }, + this.replica + ) )) : null; diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index 98ee75cda39..49d8f303560 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -35,6 +35,7 @@ import { import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.server"; import { buildSyntheticSpanRun } from "~/v3/mollifier/syntheticSpanRun.server"; +import { runStore } from "~/v3/runStore.server"; export type PromptSpanData = { slug: string; @@ -132,20 +133,23 @@ export class SpanPresenter extends BasePresenter { throw new Error("Project not found"); } - const parentRun = await this._prisma.taskRun.findFirst({ - select: { - traceId: true, - runtimeEnvironmentId: true, - projectId: true, - taskEventStore: true, - createdAt: true, - completedAt: true, - }, - where: { + const parentRun = await runStore.findRun( + { friendlyId: runFriendlyId, projectId: project.id, }, - }); + { + select: { + traceId: true, + runtimeEnvironmentId: true, + projectId: true, + taskEventStore: true, + createdAt: true, + completedAt: true, + }, + }, + this._prisma + ); if (!parentRun) { // PG miss → fall back to the mollifier buffer. Without this the @@ -494,7 +498,17 @@ export class SpanPresenter extends BasePresenter { spanId: string; environmentId: string; }) { - const run = await this._replica.taskRun.findFirst({ + const run = await runStore.findRun( + originalRunId + ? { + friendlyId: originalRunId, + runtimeEnvironmentId: environmentId, + } + : { + spanId, + runtimeEnvironmentId: environmentId, + }, + { select: { id: true, spanId: true, @@ -608,16 +622,9 @@ export class SpanPresenter extends BasePresenter { }, }, }, - where: originalRunId - ? { - friendlyId: originalRunId, - runtimeEnvironmentId: environmentId, - } - : { - spanId, - runtimeEnvironmentId: environmentId, - }, - }); + }, + this._replica + ); return run; } @@ -655,18 +662,21 @@ export class SpanPresenter extends BasePresenter { return; } - const triggeredRuns = await this._replica.taskRun.findMany({ - select: { - friendlyId: true, - taskIdentifier: true, - spanId: true, - createdAt: true, - status: true, - }, - where: { - parentSpanId: spanId, + const triggeredRuns = await runStore.findRuns( + { + where: { + parentSpanId: spanId, + }, + select: { + friendlyId: true, + taskIdentifier: true, + spanId: true, + createdAt: true, + status: true, + }, }, - }); + this._replica + ); const data = { spanId: span.spanId, diff --git a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts index a9381ab60d2..0ebf5054bb1 100644 --- a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts @@ -12,6 +12,7 @@ import { type PrismaClient } from "~/db.server"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { getTimezones } from "~/utils/timezones.server"; import { findCurrentWorkerDeployment } from "~/v3/models/workerDeployment.server"; +import { runStore } from "~/v3/runStore.server"; import { queueTypeFromType } from "./QueueRetrievePresenter.server"; export type RunTemplate = TaskRunTemplate & { @@ -214,38 +215,41 @@ export class TestTaskPresenter { }, }); - const latestRuns = await this.replica.taskRun.findMany({ - select: { - id: true, - queue: true, - friendlyId: true, - taskIdentifier: true, - createdAt: true, - status: true, - payload: true, - payloadType: true, - seedMetadata: true, - seedMetadataType: true, - runtimeEnvironmentId: true, - concurrencyKey: true, - maxAttempts: true, - maxDurationInSeconds: true, - machinePreset: true, - ttl: true, - runTags: true, - }, - where: { - id: { - in: runIds, + const latestRuns = await runStore.findRuns( + { + where: { + id: { + in: runIds, + }, + payloadType: { + in: ["application/json", "application/super+json"], + }, }, - payloadType: { - in: ["application/json", "application/super+json"], + select: { + id: true, + queue: true, + friendlyId: true, + taskIdentifier: true, + createdAt: true, + status: true, + payload: true, + payloadType: true, + seedMetadata: true, + seedMetadataType: true, + runtimeEnvironmentId: true, + concurrencyKey: true, + maxAttempts: true, + maxDurationInSeconds: true, + machinePreset: true, + ttl: true, + runTags: true, + }, + orderBy: { + createdAt: "desc", }, }, - orderBy: { - createdAt: "desc", - }, - }); + this.replica + ); // Infer schema from existing run payloads when no explicit schema is defined let inferredPayloadSchema: unknown | undefined; From 126b05fd3da0e370b34066b6dab02ef59a9f0976 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 16:12:57 +0100 Subject: [PATCH 27/83] refactor(webapp): route API and loader TaskRun reads through the run store Relocate the route and loader TaskRun reads to the RunStore read methods, preserving the exact client per site, including the replica-resolve then writer-recheck realtime paths. Behavior-preserving. --- apps/webapp/app/routes/@.runs.$runParam.ts | 34 ++-- .../admin.api.v1.runs-replication.backfill.ts | 16 +- ....runs.$runFriendlyId.input-streams.wait.ts | 20 +- ...uns.$runFriendlyId.session-streams.wait.ts | 18 +- .../app/routes/api.v1.runs.$runId.metadata.ts | 10 +- .../api.v1.runs.$runId.spans.$spanId.ts | 35 ++-- .../app/routes/api.v1.runs.$runId.trace.ts | 8 +- .../routes/api.v1.runs.$runParam.replay.ts | 8 +- ...i.v1.sessions.$session.end-and-continue.ts | 19 +- apps/webapp/app/routes/api.v1.sessions.ts | 10 +- .../routes/api.v1.tasks.$taskId.trigger.ts | 14 +- .../app/routes/engine.v1.dev.disconnect.ts | 32 ++-- ...s.$snapshotFriendlyId.attempts.complete.ts | 8 +- ...hots.$snapshotFriendlyId.attempts.start.ts | 8 +- ...snapshots.$snapshotFriendlyId.heartbeat.ts | 8 +- ...ev.runs.$runFriendlyId.snapshots.latest.ts | 8 +- ...ne.v1.runs.$runFriendlyId.wait.duration.ts | 8 +- ...g.projects.$projectParam.runs.$runParam.ts | 14 +- .../projects.v3.$projectRef.runs.$runParam.ts | 14 +- .../app/routes/realtime.v1.runs.$runId.ts | 18 +- .../realtime.v1.streams.$runId.$streamId.ts | 57 +++--- ...streams.$runId.$target.$streamId.append.ts | 50 ++--- ...ime.v1.streams.$runId.$target.$streamId.ts | 89 +++++---- ...ltime.v1.streams.$runId.input.$streamId.ts | 39 ++-- ...projectParam.env.$envParam.logs.$logId.tsx | 10 +- ...tParam.env.$envParam.playground.action.tsx | 10 +- ...am.runs.$runParam.idempotencyKey.reset.tsx | 20 +- ...ram.realtime.v1.sessions.$sessionId.$io.ts | 12 +- ...am.realtime.v1.streams.$runId.$streamId.ts | 20 +- ...ltime.v1.streams.$runId.input.$streamId.ts | 20 +- .../route.tsx | 22 ++- .../resources.runs.$runParam.logs.download.ts | 32 ++-- .../app/routes/resources.runs.$runParam.ts | 174 +++++++++--------- .../resources.taskruns.$runParam.cancel.ts | 8 +- .../resources.taskruns.$runParam.debug.ts | 48 ++--- .../resources.taskruns.$runParam.replay.ts | 121 ++++++------ apps/webapp/app/routes/runs.$runParam.ts | 34 ++-- .../app/routes/sync.traces.runs.$traceId.ts | 22 ++- 38 files changed, 621 insertions(+), 477 deletions(-) diff --git a/apps/webapp/app/routes/@.runs.$runParam.ts b/apps/webapp/app/routes/@.runs.$runParam.ts index a709191271e..cd1e1eade18 100644 --- a/apps/webapp/app/routes/@.runs.$runParam.ts +++ b/apps/webapp/app/routes/@.runs.$runParam.ts @@ -1,6 +1,7 @@ import { redirect, type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { z } from "zod"; import { prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { redirectWithErrorMessage } from "~/models/message.server"; import { requireUser } from "~/services/session.server"; import { impersonate, rootPath, v3RunPath, v3RunSpanPath } from "~/utils/pathBuilder"; @@ -28,29 +29,32 @@ export async function loader({ params, request }: LoaderFunctionArgs) { ); } - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runParam, }, - select: { - spanId: true, - runtimeEnvironment: { - select: { - slug: true, + { + select: { + spanId: true, + runtimeEnvironment: { + select: { + slug: true, + }, }, - }, - project: { - select: { - slug: true, - organization: { - select: { - slug: true, + project: { + select: { + slug: true, + organization: { + select: { + slug: true, + }, }, }, }, }, }, - }); + prisma + ); if (!run) { // Admin impersonation route — bypass org membership so admins can diff --git a/apps/webapp/app/routes/admin.api.v1.runs-replication.backfill.ts b/apps/webapp/app/routes/admin.api.v1.runs-replication.backfill.ts index c4d17ba875d..af041353ada 100644 --- a/apps/webapp/app/routes/admin.api.v1.runs-replication.backfill.ts +++ b/apps/webapp/app/routes/admin.api.v1.runs-replication.backfill.ts @@ -2,6 +2,7 @@ import { type ActionFunctionArgs, json } from "@remix-run/server-runtime"; import { type TaskRun } from "@trigger.dev/database"; import { z } from "zod"; import { prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { logger } from "~/services/logger.server"; import { requireAdminApiRequest } from "~/services/personalAccessToken.server"; import { runsReplicationInstance } from "~/services/runsReplicationInstance.server"; @@ -25,14 +26,17 @@ export async function action({ request }: ActionFunctionArgs) { const runs: TaskRun[] = []; for (let i = 0; i < runIds.length; i += MAX_BATCH_SIZE) { const batch = runIds.slice(i, i + MAX_BATCH_SIZE); - const batchRuns = await prisma.taskRun.findMany({ - where: { - id: { in: batch }, - status: { - in: FINAL_RUN_STATUSES, + const batchRuns = await runStore.findRuns( + { + where: { + id: { in: batch }, + status: { + in: FINAL_RUN_STATUSES, + }, }, }, - }); + prisma + ); runs.push(...batchRuns); } diff --git a/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.input-streams.wait.ts b/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.input-streams.wait.ts index 0924bf3fc91..091312a13b8 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.input-streams.wait.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.input-streams.wait.ts @@ -6,6 +6,7 @@ import { } from "@trigger.dev/core/v3"; import { WaitpointId } from "@trigger.dev/core/v3/isomorphic"; import { $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { createWaitpointTag, MAX_TAGS_PER_WAITPOINT } from "~/models/waitpointTag.server"; import { deleteInputStreamWaitpoint, @@ -32,18 +33,21 @@ const { action, loader } = createActionApiRoute( }, async ({ authentication, body, params }) => { try { - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runFriendlyId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - id: true, - friendlyId: true, - realtimeStreamsVersion: true, - streamBasinName: true, + { + select: { + id: true, + friendlyId: true, + realtimeStreamsVersion: true, + streamBasinName: true, + }, }, - }); + $replica + ); if (!run) { return json({ error: "Run not found" }, { status: 404 }); diff --git a/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.session-streams.wait.ts b/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.session-streams.wait.ts index 39c30894416..cd88ef38281 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.session-streams.wait.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runFriendlyId.session-streams.wait.ts @@ -6,6 +6,7 @@ import { import { WaitpointId } from "@trigger.dev/core/v3/isomorphic"; import { z } from "zod"; import { $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { createWaitpointTag, MAX_TAGS_PER_WAITPOINT } from "~/models/waitpointTag.server"; import { canonicalSessionAddressingKey, @@ -38,17 +39,20 @@ const { action, loader } = createActionApiRoute( }, async ({ authentication, body, params }) => { try { - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runFriendlyId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - id: true, - friendlyId: true, - realtimeStreamsVersion: true, + { + select: { + id: true, + friendlyId: true, + realtimeStreamsVersion: true, + }, }, - }); + $replica + ); if (!run) { return json({ error: "Run not found" }, { status: 404 }); diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts b/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts index 3f22929aca9..7ec10835c78 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts @@ -17,6 +17,7 @@ import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server import { ServiceValidationError } from "~/v3/services/common.server"; import { applyMetadataMutationToBufferedRun } from "~/v3/mollifier/applyMetadataMutation.server"; import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), @@ -39,10 +40,11 @@ export async function loader({ request, params }: LoaderFunctionArgs) { const env = authenticationResult.environment; - const pgRun = await $replica.taskRun.findFirst({ - where: { friendlyId: parsed.data.runId, runtimeEnvironmentId: env.id }, - select: { metadata: true, metadataType: true }, - }); + const pgRun = await runStore.findRun( + { friendlyId: parsed.data.runId, runtimeEnvironmentId: env.id }, + { select: { metadata: true, metadataType: true } }, + $replica + ); if (pgRun) { return json({ metadata: pgRun.metadata, metadataType: pgRun.metadataType }, { status: 200 }); } diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts b/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts index c38206473cb..061199f33e9 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts @@ -11,6 +11,7 @@ import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.server"; import { buildSyntheticSpanDetailBody } from "~/v3/mollifier/syntheticApiResponses.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), @@ -28,9 +29,10 @@ type ResolvedRun = | { source: "buffer"; run: NonNullable>> }; async function findPgRun(runId: string, environmentId: string) { - return $replica.taskRun.findFirst({ - where: { friendlyId: runId, runtimeEnvironmentId: environmentId }, - }); + return runStore.findRun( + { friendlyId: runId, runtimeEnvironmentId: environmentId }, + $replica + ); } export const loader = createLoaderApiRoute( @@ -121,19 +123,22 @@ export const loader = createLoaderApiRoute( ? extractAISpanData(span.properties as Record, durationMs) : undefined; - const triggeredRuns = await $replica.taskRun.findMany({ - take: 50, - select: { - friendlyId: true, - taskIdentifier: true, - status: true, - createdAt: true, - }, - where: { - runtimeEnvironmentId: authentication.environment.id, - parentSpanId: params.spanId, + const triggeredRuns = await runStore.findRuns( + { + take: 50, + select: { + friendlyId: true, + taskIdentifier: true, + status: true, + createdAt: true, + }, + where: { + runtimeEnvironmentId: authentication.environment.id, + parentSpanId: params.spanId, + }, }, - }); + $replica + ); const properties = span.properties && diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts b/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts index 04ae398194f..f1aa4d58967 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts @@ -10,6 +10,7 @@ import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.server"; import { buildSyntheticTraceBody } from "~/v3/mollifier/syntheticApiResponses.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), // This is the run friendly ID @@ -26,9 +27,10 @@ type ResolvedRun = | { source: "buffer"; run: NonNullable>> }; async function findPgRun(runId: string, environmentId: string) { - return $replica.taskRun.findFirst({ - where: { friendlyId: runId, runtimeEnvironmentId: environmentId }, - }); + return runStore.findRun( + { friendlyId: runId, runtimeEnvironmentId: environmentId }, + $replica + ); } export const loader = createLoaderApiRoute( diff --git a/apps/webapp/app/routes/api.v1.runs.$runParam.replay.ts b/apps/webapp/app/routes/api.v1.runs.$runParam.replay.ts index 130f6ff163a..4b238869d3a 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runParam.replay.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runParam.replay.ts @@ -3,6 +3,7 @@ import { json } from "@remix-run/server-runtime"; import type { TaskRun } from "@trigger.dev/database"; import { z } from "zod"; import { prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { authenticateApiRequest } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { ReplayTaskRunService } from "~/v3/services/replayTaskRun.server"; @@ -73,12 +74,13 @@ export async function action({ request, params }: ActionFunctionArgs) { // filter beyond friendlyId is the existing semantic; findFirst with // env scoping tightens it minimally without changing behaviour for // a correctly-authed caller. - let taskRun: TaskRun | null = await prisma.taskRun.findFirst({ - where: { + let taskRun: TaskRun | null = await runStore.findRun( + { friendlyId: runParam, runtimeEnvironmentId: env.id, }, - }); + prisma + ); if (!taskRun) { // Buffered fallback. SyntheticRun carries every field diff --git a/apps/webapp/app/routes/api.v1.sessions.$session.end-and-continue.ts b/apps/webapp/app/routes/api.v1.sessions.$session.end-and-continue.ts index 7c5718aeae3..cc1a6d4f9fc 100644 --- a/apps/webapp/app/routes/api.v1.sessions.$session.end-and-continue.ts +++ b/apps/webapp/app/routes/api.v1.sessions.$session.end-and-continue.ts @@ -12,6 +12,7 @@ import { anyResource, createActionApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ session: z.string(), @@ -83,13 +84,14 @@ const { action, loader } = createActionApiRoute( // SDK exposes via `ctx.run.id`). Internally `Session.currentRunId` // stores the TaskRun.id cuid, so resolve before handing to the // optimistic-claim service. - const callingRun = await $replica.taskRun.findFirst({ - where: { + const callingRun = await runStore.findRun( + { friendlyId: body.callingRunId, runtimeEnvironmentId: authentication.environment.id, }, - select: { id: true }, - }); + { select: { id: true } }, + $replica + ); if (!callingRun) { return json({ error: "callingRunId not found in this environment" }, { status: 404 }); } @@ -118,10 +120,11 @@ const { action, loader } = createActionApiRoute( // `$replica`. A replica miss here would silently fall back to // returning the internal cuid, which the public API contract // says is a friendlyId. - const run = await prisma.taskRun.findFirst({ - where: { id: result.runId }, - select: { friendlyId: true }, - }); + const run = await runStore.findRun( + { id: result.runId }, + { select: { friendlyId: true } }, + prisma + ); const responseBody: EndAndContinueSessionResponseBody = { runId: run?.friendlyId ?? result.runId, diff --git a/apps/webapp/app/routes/api.v1.sessions.ts b/apps/webapp/app/routes/api.v1.sessions.ts index 44f8c7ef69f..ec8c171fc20 100644 --- a/apps/webapp/app/routes/api.v1.sessions.ts +++ b/apps/webapp/app/routes/api.v1.sessions.ts @@ -29,6 +29,7 @@ import { createLoaderApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; import { ServiceValidationError } from "~/v3/services/common.server"; +import { runStore } from "~/v3/runStore.server"; function asArray(value: T | T[] | undefined): T[] | undefined { if (value === undefined) return undefined; @@ -264,10 +265,11 @@ const { action } = createActionApiRoute( // Read-after-write: the run was just triggered in this request, // so go to the writer rather than $replica. Replica lag here // would null this out and turn a successful create into a 500. - const run = await prisma.taskRun.findFirst({ - where: { id: ensureResult.runId }, - select: { friendlyId: true }, - }); + const run = await runStore.findRun( + { id: ensureResult.runId }, + { select: { friendlyId: true } }, + prisma + ); if (!run) { throw new Error(`Triggered run ${ensureResult.runId} not found`); } diff --git a/apps/webapp/app/routes/api.v1.tasks.$taskId.trigger.ts b/apps/webapp/app/routes/api.v1.tasks.$taskId.trigger.ts index 1f8a42af08c..eb9e5d974e4 100644 --- a/apps/webapp/app/routes/api.v1.tasks.$taskId.trigger.ts +++ b/apps/webapp/app/routes/api.v1.tasks.$taskId.trigger.ts @@ -20,6 +20,7 @@ import { saveRequestIdempotency, } from "~/utils/requestIdempotency.server"; import { sanitizeTriggerSource } from "~/utils/triggerSource"; +import { runStore } from "~/v3/runStore.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { OutOfEntitlementError, TriggerTaskService } from "~/v3/services/triggerTask.server"; @@ -77,14 +78,17 @@ const { action, loader } = createActionApiRoute( const cachedResponse = await handleRequestIdempotency(requestIdempotencyKey, { requestType: "trigger", findCachedEntity: async (cachedRequestId) => { - return await prisma.taskRun.findFirst({ - where: { + return await runStore.findRun( + { id: cachedRequestId, }, - select: { - friendlyId: true, + { + select: { + friendlyId: true, + }, }, - }); + prisma + ); }, buildResponse: (cachedRun) => ({ id: cachedRun.friendlyId, diff --git a/apps/webapp/app/routes/engine.v1.dev.disconnect.ts b/apps/webapp/app/routes/engine.v1.dev.disconnect.ts index 0cf92a53b70..01428301432 100644 --- a/apps/webapp/app/routes/engine.v1.dev.disconnect.ts +++ b/apps/webapp/app/routes/engine.v1.dev.disconnect.ts @@ -5,6 +5,7 @@ import { DevDisconnectRequestBody } from "@trigger.dev/core/v3"; import { BulkActionId, RunId } from "@trigger.dev/core/v3/isomorphic"; import { BulkActionNotificationType, BulkActionType } from "@trigger.dev/database"; import { prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { logger } from "~/services/logger.server"; import { RateLimiter } from "~/services/rateLimiter.server"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; @@ -99,21 +100,24 @@ async function cancelRunsInline( ): Promise { const runIds = runFriendlyIds.map((fid) => RunId.toId(fid)); - const runs = await prisma.taskRun.findMany({ - where: { - id: { in: runIds }, - runtimeEnvironmentId: environmentId, + const runs = await runStore.findRuns( + { + where: { + id: { in: runIds }, + runtimeEnvironmentId: environmentId, + }, + select: { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, + }, }, - select: { - id: true, - engine: true, - friendlyId: true, - status: true, - createdAt: true, - completedAt: true, - taskEventStore: true, - }, - }); + prisma + ); let cancelled = 0; const cancelService = new CancelTaskRunService(prisma); diff --git a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.complete.ts b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.complete.ts index da4bab693ba..afc481a571a 100644 --- a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.complete.ts +++ b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.complete.ts @@ -9,6 +9,7 @@ import { prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { engine } from "~/v3/runEngine.server"; +import { runStore } from "~/v3/runStore.server"; const { action } = createActionApiRoute( { @@ -28,12 +29,13 @@ const { action } = createActionApiRoute( const { runFriendlyId, snapshotFriendlyId } = params; try { - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runFriendlyId, runtimeEnvironmentId: authentication.environment.id, }, - }); + prisma + ); if (!run) { throw new Response("You don't have permissions for this run", { status: 401 }); diff --git a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts index a3f35013b78..4c057046479 100644 --- a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts +++ b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts @@ -18,6 +18,7 @@ import { import { resolveVariablesForEnvironment } from "~/v3/environmentVariables/environmentVariablesRepository.server"; import { machinePresetFromName } from "~/v3/machinePresets.server"; import { engine } from "~/v3/runEngine.server"; +import { runStore } from "~/v3/runStore.server"; const { action } = createActionApiRoute( { @@ -36,12 +37,13 @@ const { action } = createActionApiRoute( const { runFriendlyId, snapshotFriendlyId } = params; try { - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runFriendlyId, runtimeEnvironmentId: authentication.environment.id, }, - }); + prisma + ); if (!run) { throw new Response("You don't have permissions for this run", { status: 401 }); diff --git a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.heartbeat.ts b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.heartbeat.ts index bab59fd0637..d9f6ca9a6d0 100644 --- a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.heartbeat.ts +++ b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.heartbeat.ts @@ -6,6 +6,7 @@ import { prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { engine } from "~/v3/runEngine.server"; +import { runStore } from "~/v3/runStore.server"; const { action } = createActionApiRoute( { @@ -23,12 +24,13 @@ const { action } = createActionApiRoute( const { runFriendlyId, snapshotFriendlyId } = params; try { - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runFriendlyId, runtimeEnvironmentId: authentication.environment.id, }, - }); + prisma + ); if (!run) { throw new Response("You don't have permissions for this run", { status: 401 }); diff --git a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.latest.ts b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.latest.ts index 60505460bd6..9254a74e834 100644 --- a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.latest.ts +++ b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.latest.ts @@ -6,6 +6,7 @@ import { prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { engine } from "~/v3/runEngine.server"; +import { runStore } from "~/v3/runStore.server"; export const loader = createLoaderApiRoute( { @@ -24,12 +25,13 @@ export const loader = createLoaderApiRoute( }); try { - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runFriendlyId, runtimeEnvironmentId: authentication.environment.id, }, - }); + prisma + ); if (!run) { throw new Response("You don't have permissions for this run", { status: 401 }); diff --git a/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts b/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts index 199244b1da8..8d7f6b84345 100644 --- a/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts +++ b/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts @@ -8,6 +8,7 @@ import { logger } from "~/services/logger.server"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { resolveIdempotencyKeyTTL } from "~/utils/idempotencyKeys.server"; import { engine } from "~/v3/runEngine.server"; +import { runStore } from "~/v3/runStore.server"; const { action } = createActionApiRoute( { @@ -22,12 +23,13 @@ const { action } = createActionApiRoute( const runId = RunId.toId(runFriendlyId); try { - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { id: runId, runtimeEnvironmentId: authentication.environment.id, }, - }); + prisma + ); if (!run) { throw new Response("You don't have permissions for this run", { status: 401 }); diff --git a/apps/webapp/app/routes/orgs.$organizationSlug.projects.$projectParam.runs.$runParam.ts b/apps/webapp/app/routes/orgs.$organizationSlug.projects.$projectParam.runs.$runParam.ts index 63a89d7e0aa..d5d4ab0f2f6 100644 --- a/apps/webapp/app/routes/orgs.$organizationSlug.projects.$projectParam.runs.$runParam.ts +++ b/apps/webapp/app/routes/orgs.$organizationSlug.projects.$projectParam.runs.$runParam.ts @@ -4,6 +4,7 @@ import { z } from "zod"; import { prisma } from "~/db.server"; import { requireUserId } from "~/services/session.server"; import { ProjectParamSchema, v3RunPath } from "~/utils/pathBuilder"; +import { runStore } from "~/v3/runStore.server"; const ParamSchema = ProjectParamSchema.extend({ runParam: z.string(), @@ -13,8 +14,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const userId = await requireUserId(request); const { organizationSlug, projectParam, runParam } = ParamSchema.parse(params); - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runParam, project: { slug: projectParam, @@ -28,10 +29,13 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { }, }, }, - select: { - runtimeEnvironment: true, + { + select: { + runtimeEnvironment: true, + }, }, - }); + prisma + ); if (!run) { throw new Response("Not Found", { status: 404 }); diff --git a/apps/webapp/app/routes/projects.v3.$projectRef.runs.$runParam.ts b/apps/webapp/app/routes/projects.v3.$projectRef.runs.$runParam.ts index fe267d1f9fa..2a6cb34c913 100644 --- a/apps/webapp/app/routes/projects.v3.$projectRef.runs.$runParam.ts +++ b/apps/webapp/app/routes/projects.v3.$projectRef.runs.$runParam.ts @@ -3,6 +3,7 @@ import { z } from "zod"; import { prisma } from "~/db.server"; import { requireUserId } from "~/services/session.server"; import { v3RunSpanPath } from "~/utils/pathBuilder"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ projectRef: z.string(), @@ -34,14 +35,17 @@ export async function loader({ params, request }: LoaderFunctionArgs) { return new Response("Not found", { status: 404 }); } - const run = await prisma.taskRun.findUnique({ - where: { + const run = await runStore.findRun( + { friendlyId: validatedParams.runParam, }, - include: { - runtimeEnvironment: true, + { + include: { + runtimeEnvironment: true, + }, }, - }); + prisma + ); if (!run) { throw new Response("Not found", { status: 404 }); diff --git a/apps/webapp/app/routes/realtime.v1.runs.$runId.ts b/apps/webapp/app/routes/realtime.v1.runs.$runId.ts index 46118c1d894..f2268989bdd 100644 --- a/apps/webapp/app/routes/realtime.v1.runs.$runId.ts +++ b/apps/webapp/app/routes/realtime.v1.runs.$runId.ts @@ -7,6 +7,7 @@ import { anyResource, createLoaderApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), @@ -18,19 +19,22 @@ export const loader = createLoaderApiRoute( allowJWT: true, corsStrategy: "all", findResource: async (params, authentication) => { - return $replica.taskRun.findFirst({ - where: { + return runStore.findRun( + { friendlyId: params.runId, runtimeEnvironmentId: authentication.environment.id, }, - include: { - batch: { - select: { - friendlyId: true, + { + include: { + batch: { + select: { + friendlyId: true, + }, }, }, }, - }); + $replica + ); }, authorization: { action: "read", diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts index d6470794a73..81784f9bc3a 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$streamId.ts @@ -8,6 +8,7 @@ import { createLoaderApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), @@ -25,23 +26,26 @@ export async function action({ request, params }: ActionFunctionArgs) { const { runId, streamId } = parsedParams.data; // Look up the run without environment scoping for backwards compatibility - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runId, }, - select: { - id: true, - friendlyId: true, - streamBasinName: true, - runtimeEnvironment: { - include: { - project: true, - organization: true, - orgMember: true, + { + select: { + id: true, + friendlyId: true, + streamBasinName: true, + runtimeEnvironment: { + include: { + project: true, + organization: true, + orgMember: true, + }, }, }, }, - }); + $replica + ); if (!run) { return new Response("Run not found", { status: 404 }); @@ -87,25 +91,28 @@ export const loader = createLoaderApiRoute( allowJWT: true, corsStrategy: "all", findResource: async (params, auth) => { - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runId, runtimeEnvironmentId: auth.environment.id, }, - select: { - id: true, - friendlyId: true, - taskIdentifier: true, - runTags: true, - realtimeStreamsVersion: true, - streamBasinName: true, - batch: { - select: { - friendlyId: true, + { + select: { + id: true, + friendlyId: true, + taskIdentifier: true, + runTags: true, + realtimeStreamsVersion: true, + streamBasinName: true, + batch: { + select: { + friendlyId: true, + }, }, }, }, - }); + $replica + ); return run; }, authorization: { diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts index 11074840a38..7cb813a6dec 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts @@ -27,26 +27,29 @@ const { action } = createActionApiRoute( maxContentLength: MAX_APPEND_BODY_BYTES, }, async ({ request, params, authentication }) => { - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - id: true, - friendlyId: true, - parentTaskRun: { - select: { - friendlyId: true, + { + select: { + id: true, + friendlyId: true, + parentTaskRun: { + select: { + friendlyId: true, + }, }, - }, - rootTaskRun: { - select: { - friendlyId: true, + rootTaskRun: { + select: { + friendlyId: true, + }, }, }, }, - }); + $replica + ); if (!run) { return new Response("Run not found", { status: 404 }); @@ -63,19 +66,22 @@ const { action } = createActionApiRoute( return new Response("Target not found", { status: 404 }); } - const targetRun = await prisma.taskRun.findFirst({ - where: { + const targetRun = await runStore.findRun( + { friendlyId: targetId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - realtimeStreams: true, - realtimeStreamsVersion: true, - completedAt: true, - id: true, - streamBasinName: true, + { + select: { + realtimeStreams: true, + realtimeStreamsVersion: true, + completedAt: true, + id: true, + streamBasinName: true, + }, }, - }); + prisma + ); if (!targetRun) { return new Response("Run not found", { status: 404 }); diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts index cdee9567b79..c71ad48d121 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts @@ -19,29 +19,32 @@ const { action } = createActionApiRoute( params: ParamsSchema, }, async ({ request, params, authentication }) => { - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - id: true, - friendlyId: true, - streamBasinName: true, - parentTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, + { + select: { + id: true, + friendlyId: true, + streamBasinName: true, + parentTaskRun: { + select: { + friendlyId: true, + streamBasinName: true, + }, }, - }, - rootTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, + rootTaskRun: { + select: { + friendlyId: true, + streamBasinName: true, + }, }, }, }, - }); + $replica + ); if (!run) { return new Response("Run not found", { status: 404 }); @@ -63,18 +66,21 @@ const { action } = createActionApiRoute( if (request.method === "PUT") { // This is the "create" endpoint - const target = await prisma.taskRun.findFirst({ - where: { + const target = await runStore.findRun( + { friendlyId: targetId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - id: true, - realtimeStreams: true, - realtimeStreamsVersion: true, - completedAt: true, + { + select: { + id: true, + realtimeStreams: true, + realtimeStreamsVersion: true, + completedAt: true, + }, }, - }); + prisma + ); if (!target) { return new Response("Run not found", { status: 404 }); @@ -148,29 +154,32 @@ const loader = createLoaderApiRoute( allowJWT: false, corsStrategy: "none", findResource: async (params, authentication) => { - return $replica.taskRun.findFirst({ - where: { + return runStore.findRun( + { friendlyId: params.runId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - id: true, - friendlyId: true, - streamBasinName: true, - parentTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, + { + select: { + id: true, + friendlyId: true, + streamBasinName: true, + parentTaskRun: { + select: { + friendlyId: true, + streamBasinName: true, + }, }, - }, - rootTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, + rootTaskRun: { + select: { + friendlyId: true, + streamBasinName: true, + }, }, }, }, - }); + $replica + ); }, }, async ({ request, params, resource: run, authentication }) => { diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.input.$streamId.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.input.$streamId.ts index a404e6a76ae..78fe332b8af 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.input.$streamId.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.input.$streamId.ts @@ -15,6 +15,7 @@ import { import { getRealtimeStreamInstance } from "~/services/realtime/v1StreamsGlobal.server"; import { engine } from "~/v3/runEngine.server"; import { ServiceValidationError } from "~/v3/services/common.server"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runId: z.string(), @@ -38,19 +39,22 @@ const { action } = createActionApiRoute( }, }, async ({ request, params, authentication }) => { - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: params.runId, runtimeEnvironmentId: authentication.environment.id, }, - select: { - id: true, - friendlyId: true, - completedAt: true, - realtimeStreamsVersion: true, - streamBasinName: true, + { + select: { + id: true, + friendlyId: true, + completedAt: true, + realtimeStreamsVersion: true, + streamBasinName: true, + }, }, - }); + $replica + ); if (!run) { return json({ ok: false, error: "Run not found" }, { status: 404 }); @@ -129,19 +133,22 @@ const loader = createLoaderApiRoute( allowJWT: true, corsStrategy: "all", findResource: async (params, auth) => { - return $replica.taskRun.findFirst({ - where: { + return runStore.findRun( + { friendlyId: params.runId, runtimeEnvironmentId: auth.environment.id, }, - include: { - batch: { - select: { - friendlyId: true, + { + include: { + batch: { + select: { + friendlyId: true, + }, }, }, }, - }); + $replica + ); }, authorization: { action: "read", diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx index f4d34907042..be4fdba7fe8 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx @@ -7,6 +7,7 @@ import { LogDetailPresenter } from "~/presenters/v3/LogDetailPresenter.server"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import type { TaskRunStatus } from "@trigger.dev/database"; @@ -70,13 +71,14 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { // Look up the run status from Postgres let runStatus: TaskRunStatus | undefined; if (result.runId) { - const run = await $replica.taskRun.findFirst({ - select: { status: true }, - where: { + const run = await runStore.findRun( + { friendlyId: result.runId, runtimeEnvironmentId: environment.id, }, - }); + { select: { status: true } }, + $replica + ); runStatus = run?.status; } diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.action.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.action.tsx index 0fab90e1457..da77d2cc692 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.action.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.action.tsx @@ -10,6 +10,7 @@ import { requireUserId } from "~/services/session.server"; import { EnvironmentParamSchema } from "~/utils/pathBuilder"; import { mintSessionToken } from "~/services/realtime/mintSessionToken.server"; import { ensureRunForSession } from "~/services/realtime/sessionRunManager.server"; +import { runStore } from "~/v3/runStore.server"; const PlaygroundAction = z.object({ intent: z.enum(["create", "start", "save", "delete"]), @@ -183,10 +184,11 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { reason: "initial", }); - const run = await prisma.taskRun.findFirst({ - where: { id: ensureResult.runId }, - select: { friendlyId: true }, - }); + const run = await runStore.findRun( + { id: ensureResult.runId }, + { select: { friendlyId: true } }, + prisma + ); if (!run) { return json({ error: "Triggered run not found" }, { status: 500 }); } diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.idempotencyKey.reset.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.idempotencyKey.reset.tsx index 614b668f910..06233f88c70 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.idempotencyKey.reset.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.idempotencyKey.reset.tsx @@ -3,6 +3,7 @@ import { prisma } from "~/db.server"; import { jsonWithErrorMessage, jsonWithSuccessMessage } from "~/models/message.server"; import { logger } from "~/services/logger.server"; import { requireUserId } from "~/services/session.server"; +import { runStore } from "~/v3/runStore.server"; import { ResetIdempotencyKeyService } from "~/v3/services/resetIdempotencyKey.server"; import { v3RunParamsSchema } from "~/utils/pathBuilder"; @@ -11,8 +12,8 @@ export const action: ActionFunction = async ({ request, params }) => { const { projectParam, organizationSlug, envParam, runParam } = v3RunParamsSchema.parse(params); try { - const taskRun = await prisma.taskRun.findFirst({ - where: { + const taskRun = await runStore.findRun( + { friendlyId: runParam, project: { slug: projectParam, @@ -29,13 +30,16 @@ export const action: ActionFunction = async ({ request, params }) => { slug: envParam, }, }, - select: { - id: true, - idempotencyKey: true, - taskIdentifier: true, - runtimeEnvironmentId: true, + { + select: { + id: true, + idempotencyKey: true, + taskIdentifier: true, + runtimeEnvironmentId: true, + }, }, - }); + prisma + ); if (!taskRun) { return jsonWithErrorMessage({}, request, "Run not found"); diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.sessions.$sessionId.$io.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.sessions.$sessionId.$io.ts index 66135347253..3a0dfca568e 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.sessions.$sessionId.$io.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.sessions.$sessionId.$io.ts @@ -1,6 +1,7 @@ import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { z } from "zod"; import { $replica } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server"; @@ -50,13 +51,16 @@ export async function loader({ request, params }: LoaderFunctionArgs) { // Verify the run lives in this environment — keeps callers from // subscribing to arbitrary sessions via `/runs/$runParam/...`. - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runParam, runtimeEnvironmentId: environment.id, }, - select: { id: true, friendlyId: true }, - }); + { + select: { id: true, friendlyId: true }, + }, + $replica + ); if (!run) { return new Response("Run not found", { status: 404 }); diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.$streamId.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.$streamId.ts index 8d0af728df8..cec6c3c4e98 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.$streamId.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.$streamId.ts @@ -7,6 +7,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { requireUserId } from "~/services/session.server"; import { EnvironmentParamSchema } from "~/utils/pathBuilder"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runParam: z.string(), @@ -44,18 +45,21 @@ export async function loader({ request, params }: LoaderFunctionArgs) { return new Response("Environment not found", { status: 404 }); } - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runId, runtimeEnvironmentId: environment.id, }, - select: { - id: true, - friendlyId: true, - realtimeStreamsVersion: true, - streamBasinName: true, + { + select: { + id: true, + friendlyId: true, + realtimeStreamsVersion: true, + streamBasinName: true, + }, }, - }); + $replica + ); if (!run) { return new Response("Run not found", { status: 404 }); diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.input.$streamId.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.input.$streamId.ts index c9480299cc0..1ecc7819c23 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.input.$streamId.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.realtime.v1.streams.$runId.input.$streamId.ts @@ -7,6 +7,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { requireUserId } from "~/services/session.server"; import { EnvironmentParamSchema } from "~/utils/pathBuilder"; +import { runStore } from "~/v3/runStore.server"; const ParamsSchema = z.object({ runParam: z.string(), @@ -46,18 +47,21 @@ export async function loader({ request, params }: LoaderFunctionArgs) { return new Response("Environment not found", { status: 404 }); } - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runId, runtimeEnvironmentId: environment.id, }, - select: { - id: true, - friendlyId: true, - realtimeStreamsVersion: true, - streamBasinName: true, + { + select: { + id: true, + friendlyId: true, + realtimeStreamsVersion: true, + streamBasinName: true, + }, }, - }); + $replica + ); if (!run) { return new Response("Run not found", { status: 404 }); diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.streams.$streamKey/route.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.streams.$streamKey/route.tsx index 60233d6d38f..24e7a73374f 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.streams.$streamKey/route.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.streams.$streamKey/route.tsx @@ -26,6 +26,7 @@ import { getRealtimeStreamInstance } from "~/services/realtime/v1StreamsGlobal.s import { requireUserId } from "~/services/session.server"; import { cn } from "~/utils/cn"; import { v3RunStreamParamsSchema } from "~/utils/pathBuilder"; +import { runStore } from "~/v3/runStore.server"; type ViewMode = "list" | "compact"; @@ -58,21 +59,24 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { throw new Response("Not Found", { status: 404 }); } - const run = await $replica.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runParam, projectId: project.id, }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - orgMember: true, + { + include: { + runtimeEnvironment: { + include: { + project: true, + organization: true, + orgMember: true, + }, }, }, }, - }); + $replica + ); if (!run) { throw new Response("Not Found", { status: 404 }); diff --git a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts index 7cda5ac7824..7662a88b4d2 100644 --- a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts +++ b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts @@ -1,6 +1,7 @@ import { LoaderFunctionArgs } from "@remix-run/server-runtime"; import { prisma } from "~/db.server"; import { env } from "~/env.server"; +import { runStore } from "~/v3/runStore.server"; import { requireUser } from "~/services/session.server"; import { v3RunParamsSchema, v3RunPath } from "~/utils/pathBuilder"; import { createGzip } from "zlib"; @@ -26,8 +27,8 @@ export async function loader({ params, request }: LoaderFunctionArgs) { const showDebug = url.searchParams.get("showDebug") === "true" && user.admin; const filename = `${parsedParams.runParam}.${format.extension}`; - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: parsedParams.runParam, project: { organization: { @@ -39,19 +40,22 @@ export async function loader({ params, request }: LoaderFunctionArgs) { }, }, }, - select: { - friendlyId: true, - traceId: true, - organizationId: true, - runtimeEnvironmentId: true, - createdAt: true, - completedAt: true, - taskEventStore: true, - taskIdentifier: true, - project: { select: { slug: true, organization: { select: { slug: true } } } }, - runtimeEnvironment: { select: { slug: true } }, + { + select: { + friendlyId: true, + traceId: true, + organizationId: true, + runtimeEnvironmentId: true, + createdAt: true, + completedAt: true, + taskEventStore: true, + taskIdentifier: true, + project: { select: { slug: true, organization: { select: { slug: true } } } }, + runtimeEnvironment: { select: { slug: true } }, + }, }, - }); + prisma + ); if (!run || !run.organizationId) { // Buffered run? It hasn't executed, so there's no trace to stream — but a diff --git a/apps/webapp/app/routes/resources.runs.$runParam.ts b/apps/webapp/app/routes/resources.runs.$runParam.ts index c5e467533a3..38e17531f6f 100644 --- a/apps/webapp/app/routes/resources.runs.$runParam.ts +++ b/apps/webapp/app/routes/resources.runs.$runParam.ts @@ -6,6 +6,7 @@ import { $replica } from "~/db.server"; import { requireUserId } from "~/services/session.server"; import { v3RunParamsSchema } from "~/utils/pathBuilder"; import { machinePresetFromName, machinePresetFromRun } from "~/v3/machinePresets.server"; +import { runStore } from "~/v3/runStore.server"; import { FINAL_ATTEMPT_STATUSES, isFinalRunStatus } from "~/v3/taskStatus"; export type RunInspectorData = UseDataFunctionReturn; @@ -14,104 +15,107 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const userId = await requireUserId(request); const parsedParams = v3RunParamsSchema.pick({ runParam: true }).parse(params); - const run = await $replica.taskRun.findFirst({ - select: { - id: true, - traceId: true, - //metadata - number: true, - taskIdentifier: true, - friendlyId: true, - isTest: true, - runTags: true, - machinePreset: true, - lockedToVersion: { - select: { - version: true, - sdkVersion: true, - }, - }, - //status + duration - status: true, - startedAt: true, - createdAt: true, - updatedAt: true, - queuedAt: true, - completedAt: true, - logsDeletedAt: true, - //idempotency - idempotencyKey: true, - //delayed - delayUntil: true, - //ttl - ttl: true, - expiredAt: true, - //queue - queue: true, - concurrencyKey: true, - //schedule - scheduleId: true, - //usage - baseCostInCents: true, - costInCents: true, - usageDurationMs: true, - //env - runtimeEnvironment: { - select: { id: true, slug: true, type: true }, - }, - payload: true, - payloadType: true, - metadata: true, - metadataType: true, - maxAttempts: true, + const run = await runStore.findRun( + { + friendlyId: parsedParams.runParam, project: { - include: { - organization: true, + organization: { + members: { + some: { + userId, + }, + }, }, }, - lockedBy: { - select: { - filePath: true, - worker: { - select: { - deployment: { - select: { - friendlyId: true, - shortCode: true, - version: true, - runtime: true, - runtimeVersion: true, - git: true, + }, + { + select: { + id: true, + traceId: true, + //metadata + number: true, + taskIdentifier: true, + friendlyId: true, + isTest: true, + runTags: true, + machinePreset: true, + lockedToVersion: { + select: { + version: true, + sdkVersion: true, + }, + }, + //status + duration + status: true, + startedAt: true, + createdAt: true, + updatedAt: true, + queuedAt: true, + completedAt: true, + logsDeletedAt: true, + //idempotency + idempotencyKey: true, + //delayed + delayUntil: true, + //ttl + ttl: true, + expiredAt: true, + //queue + queue: true, + concurrencyKey: true, + //schedule + scheduleId: true, + //usage + baseCostInCents: true, + costInCents: true, + usageDurationMs: true, + //env + runtimeEnvironment: { + select: { id: true, slug: true, type: true }, + }, + payload: true, + payloadType: true, + metadata: true, + metadataType: true, + maxAttempts: true, + project: { + include: { + organization: true, + }, + }, + lockedBy: { + select: { + filePath: true, + worker: { + select: { + deployment: { + select: { + friendlyId: true, + shortCode: true, + version: true, + runtime: true, + runtimeVersion: true, + git: true, + }, }, }, }, }, }, - }, - parentTaskRun: { - select: { - friendlyId: true, - }, - }, - rootTaskRun: { - select: { - friendlyId: true, + parentTaskRun: { + select: { + friendlyId: true, + }, }, - }, - }, - where: { - friendlyId: parsedParams.runParam, - project: { - organization: { - members: { - some: { - userId, - }, + rootTaskRun: { + select: { + friendlyId: true, }, }, }, }, - }); + $replica + ); if (!run) { throw new Response("Not found", { status: 404 }); diff --git a/apps/webapp/app/routes/resources.taskruns.$runParam.cancel.ts b/apps/webapp/app/routes/resources.taskruns.$runParam.cancel.ts index fa6ee29f3db..ca92615bb83 100644 --- a/apps/webapp/app/routes/resources.taskruns.$runParam.cancel.ts +++ b/apps/webapp/app/routes/resources.taskruns.$runParam.cancel.ts @@ -7,6 +7,7 @@ import { logger } from "~/services/logger.server"; import { requireUserId } from "~/services/session.server"; import { CancelTaskRunService } from "~/v3/services/cancelTaskRun.server"; import { getMollifierBuffer } from "~/v3/mollifier/mollifierBuffer.server"; +import { runStore } from "~/v3/runStore.server"; export const cancelSchema = z.object({ redirectUrl: z.string(), @@ -28,8 +29,8 @@ export const action: ActionFunction = async ({ request, params }) => { } try { - const taskRun = await prisma.taskRun.findFirst({ - where: { + const taskRun = await runStore.findRun( + { friendlyId: runParam, project: { organization: { @@ -41,7 +42,8 @@ export const action: ActionFunction = async ({ request, params }) => { }, }, }, - }); + prisma + ); if (taskRun) { const cancelRunService = new CancelTaskRunService(); diff --git a/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts b/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts index d7acf18e517..7b37b1bcc00 100644 --- a/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts +++ b/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts @@ -5,6 +5,7 @@ import { $replica } from "~/db.server"; import { requireUserId } from "~/services/session.server"; import { marqs } from "~/v3/marqs/index.server"; import { engine } from "~/v3/runEngine.server"; +import { runStore } from "~/v3/runStore.server"; const ParamSchema = z.object({ runParam: z.string(), @@ -14,33 +15,36 @@ export async function loader({ request, params }: LoaderFunctionArgs) { const userId = await requireUserId(request); const { runParam } = ParamSchema.parse(params); - const run = await $replica.taskRun.findFirst({ - where: { friendlyId: runParam, project: { organization: { members: { some: { userId } } } } }, - select: { - id: true, - engine: true, - friendlyId: true, - queue: true, - concurrencyKey: true, - queueTimestamp: true, - runtimeEnvironment: { - select: { - id: true, - type: true, - slug: true, - organizationId: true, - project: true, - maximumConcurrencyLimit: true, - concurrencyLimitBurstFactor: true, - organization: { - select: { - id: true, + const run = await runStore.findRun( + { friendlyId: runParam, project: { organization: { members: { some: { userId } } } } }, + { + select: { + id: true, + engine: true, + friendlyId: true, + queue: true, + concurrencyKey: true, + queueTimestamp: true, + runtimeEnvironment: { + select: { + id: true, + type: true, + slug: true, + organizationId: true, + project: true, + maximumConcurrencyLimit: true, + concurrencyLimitBurstFactor: true, + organization: { + select: { + id: true, + }, }, }, }, }, }, - }); + $replica + ); if (!run) { throw new Response("Not Found", { status: 404 }); diff --git a/apps/webapp/app/routes/resources.taskruns.$runParam.replay.ts b/apps/webapp/app/routes/resources.taskruns.$runParam.replay.ts index 03bfdaccc65..0719a8e6a19 100644 --- a/apps/webapp/app/routes/resources.taskruns.$runParam.replay.ts +++ b/apps/webapp/app/routes/resources.taskruns.$runParam.replay.ts @@ -23,6 +23,7 @@ import { findCurrentWorkerDeployment } from "~/v3/models/workerDeployment.server import { queueTypeFromType } from "~/presenters/v3/QueueRetrievePresenter.server"; import { ReplayRunData } from "~/v3/replayTask"; import { RegionsPresenter } from "~/presenters/v3/RegionsPresenter.server"; +import { runStore } from "~/v3/runStore.server"; const ParamSchema = z.object({ runParam: z.string(), @@ -40,61 +41,64 @@ export async function loader({ request, params }: LoaderFunctionArgs) { Object.fromEntries(new URL(request.url).searchParams) ); - let run = await $replica.taskRun.findFirst({ - select: { - payload: true, - payloadType: true, - seedMetadata: true, - seedMetadataType: true, - runtimeEnvironmentId: true, - concurrencyKey: true, - maxAttempts: true, - maxDurationInSeconds: true, - machinePreset: true, - workerQueue: true, - region: true, - ttl: true, - idempotencyKey: true, - runTags: true, - queue: true, - taskIdentifier: true, - project: { - select: { - slug: true, - environments: { - select: { - id: true, - type: true, - slug: true, - branchName: true, - orgMember: { - select: { - user: true, + let run = await runStore.findRun( + { friendlyId: runParam, project: { organization: { members: { some: { userId } } } } }, + { + select: { + payload: true, + payloadType: true, + seedMetadata: true, + seedMetadataType: true, + runtimeEnvironmentId: true, + concurrencyKey: true, + maxAttempts: true, + maxDurationInSeconds: true, + machinePreset: true, + workerQueue: true, + region: true, + ttl: true, + idempotencyKey: true, + runTags: true, + queue: true, + taskIdentifier: true, + project: { + select: { + slug: true, + environments: { + select: { + id: true, + type: true, + slug: true, + branchName: true, + orgMember: { + select: { + user: true, + }, }, }, - }, - where: { - archivedAt: null, - OR: [ - { - type: { - in: ["PREVIEW", "STAGING", "PRODUCTION"], + where: { + archivedAt: null, + OR: [ + { + type: { + in: ["PREVIEW", "STAGING", "PRODUCTION"], + }, }, - }, - { - type: "DEVELOPMENT", - orgMember: { - userId, + { + type: "DEVELOPMENT", + orgMember: { + userId, + }, }, - }, - ], + ], + }, }, }, }, }, }, - where: { friendlyId: runParam, project: { organization: { members: { some: { userId } } } } }, - }); + $replica + ); let synthetic: | (Awaited> & { __synth: true }) @@ -272,8 +276,8 @@ export const action: ActionFunction = async ({ request, params }) => { } try { - const pgRun = await prisma.taskRun.findFirst({ - where: { + const pgRun = await runStore.findRun( + { friendlyId: runParam, project: { organization: { @@ -285,19 +289,22 @@ export const action: ActionFunction = async ({ request, params }) => { }, }, }, - include: { - runtimeEnvironment: { - select: { - slug: true, + { + include: { + runtimeEnvironment: { + select: { + slug: true, + }, }, - }, - project: { - include: { - organization: true, + project: { + include: { + organization: true, + }, }, }, }, - }); + prisma + ); // Mollifier read-fallback: if the original isn't in PG yet, // synthesise a TaskRun from the buffered snapshot. The B4-extended diff --git a/apps/webapp/app/routes/runs.$runParam.ts b/apps/webapp/app/routes/runs.$runParam.ts index b472d7ae8f4..5e0c2b21d6b 100644 --- a/apps/webapp/app/routes/runs.$runParam.ts +++ b/apps/webapp/app/routes/runs.$runParam.ts @@ -1,6 +1,7 @@ import { redirect, type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { z } from "zod"; import { prisma } from "~/db.server"; +import { runStore } from "~/v3/runStore.server"; import { redirectWithErrorMessage } from "~/models/message.server"; import { requireUser } from "~/services/session.server"; import { rootPath, v3RunPath } from "~/utils/pathBuilder"; @@ -14,8 +15,8 @@ export async function loader({ params, request }: LoaderFunctionArgs) { const { runParam } = ParamsSchema.parse(params); - const run = await prisma.taskRun.findFirst({ - where: { + const run = await runStore.findRun( + { friendlyId: runParam, project: { organization: { @@ -27,25 +28,28 @@ export async function loader({ params, request }: LoaderFunctionArgs) { }, }, }, - select: { - spanId: true, - runtimeEnvironment: { - select: { - slug: true, + { + select: { + spanId: true, + runtimeEnvironment: { + select: { + slug: true, + }, }, - }, - project: { - select: { - slug: true, - organization: { - select: { - slug: true, + project: { + select: { + slug: true, + organization: { + select: { + slug: true, + }, }, }, }, }, }, - }); + prisma + ); if (!run) { return redirectWithErrorMessage( diff --git a/apps/webapp/app/routes/sync.traces.runs.$traceId.ts b/apps/webapp/app/routes/sync.traces.runs.$traceId.ts index 279e2ffa517..ee5d1c964f4 100644 --- a/apps/webapp/app/routes/sync.traces.runs.$traceId.ts +++ b/apps/webapp/app/routes/sync.traces.runs.$traceId.ts @@ -5,6 +5,7 @@ import { env } from "~/env.server"; import { logger } from "~/services/logger.server"; import { getUserId } from "~/services/session.server"; import { longPollingFetch } from "~/utils/longPollingFetch"; +import { runStore } from "~/v3/runStore.server"; const Params = z.object({ traceId: z.string(), @@ -21,18 +22,21 @@ export async function loader({ params, request }: LoaderFunctionArgs) { return new Response("No user found in cookie", { status: 401 }); } - const run = await $replica.taskRun.findFirst({ - select: { - project: { - select: { - organizationId: true, + const run = await runStore.findRun( + { + traceId, + }, + { + select: { + project: { + select: { + organizationId: true, + }, }, }, }, - where: { - traceId, - }, - }); + $replica + ); if (!run) { return new Response("No run found", { status: 404 }); From f59abe7c7f8702aaddee4d6b2d29e224d9c103d9 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 16:26:54 +0100 Subject: [PATCH 28/83] refactor(webapp): hydrate parent-model TaskRun reads through the run store Decompose the three reads that pulled TaskRun in through a parent model's relation include (alert, batch results, attempt dependencies): query the parent without the include, hydrate the run(s) via RunStore in a single batched read, and stitch them back. Preserves field selection, ordering, null handling and the query client. Adds container-backed tests for the batch-results and cancel-dependencies paths. --- .../v3/ApiBatchResultsPresenter.server.ts | 55 +++- .../v3/services/alerts/deliverAlert.server.ts | 38 ++- .../cancelTaskAttemptDependencies.server.ts | 51 +++- .../ApiBatchResultsPresenter.test.ts | 256 ++++++++++++++++++ .../cancelTaskAttemptDependencies.test.ts | 238 ++++++++++++++++ 5 files changed, 606 insertions(+), 32 deletions(-) create mode 100644 apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts create mode 100644 apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts diff --git a/apps/webapp/app/presenters/v3/ApiBatchResultsPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiBatchResultsPresenter.server.ts index 0b610215ef9..b3dd39637da 100644 --- a/apps/webapp/app/presenters/v3/ApiBatchResultsPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiBatchResultsPresenter.server.ts @@ -1,6 +1,7 @@ import { BatchTaskRunExecutionResult } from "@trigger.dev/core/v3"; -import { executionResultForTaskRun } from "~/models/taskRun.server"; +import { executionResultForTaskRun, TaskRunWithAttempts } from "~/models/taskRun.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { runStore } from "~/v3/runStore.server"; import { BasePresenter } from "./basePresenter.server"; export class ApiBatchResultsPresenter extends BasePresenter { @@ -16,16 +17,8 @@ export class ApiBatchResultsPresenter extends BasePresenter { }, include: { items: { - include: { - taskRun: { - include: { - attempts: { - orderBy: { - createdAt: "desc", - }, - }, - }, - }, + select: { + taskRunId: true, }, }, }, @@ -35,10 +28,48 @@ export class ApiBatchResultsPresenter extends BasePresenter { return undefined; } + const taskRunIds = batchRun.items.map((item) => item.taskRunId); + + if (taskRunIds.length === 0) { + return { + id: batchRun.friendlyId, + items: [], + }; + } + + const taskRuns = await runStore.findRuns( + { + where: { id: { in: taskRunIds } }, + select: { + id: true, + friendlyId: true, + status: true, + taskIdentifier: true, + attempts: { + select: { + status: true, + output: true, + outputType: true, + error: true, + }, + orderBy: { + createdAt: "desc", + }, + }, + }, + }, + this._prisma + ); + + const runMap = new Map(taskRuns.map((run) => [run.id, run])); + return { id: batchRun.friendlyId, items: batchRun.items - .map((item) => executionResultForTaskRun(item.taskRun)) + .map((item) => { + const run = runMap.get(item.taskRunId); + return run ? executionResultForTaskRun(run as TaskRunWithAttempts) : undefined; + }) .filter(Boolean), }; }); diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index bc8f9a3a5f2..49f464d6dc8 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -102,7 +102,7 @@ type DeploymentIntegrationMetadata = { export class DeliverAlertService extends BaseService { public async call(alertId: string) { - const alert: FoundAlert | null = await this._prisma.projectAlert.findFirst({ + const alertWithoutRun = await this._prisma.projectAlert.findFirst({ where: { id: alertId }, include: { channel: true, @@ -112,18 +112,6 @@ export class DeliverAlertService extends BaseService { }, }, environment: true, - taskRun: { - include: { - lockedBy: true, - lockedToVersion: true, - runtimeEnvironment: { - select: { - type: true, - branchName: true, - }, - }, - }, - }, workerDeployment: { include: { worker: { @@ -142,10 +130,32 @@ export class DeliverAlertService extends BaseService { }, }); - if (!alert) { + if (!alertWithoutRun) { return; } + let taskRun: FoundAlert["taskRun"] = null; + if (alertWithoutRun.taskRunId) { + taskRun = await this.runStore.findRun( + { id: alertWithoutRun.taskRunId }, + { + include: { + lockedBy: true, + lockedToVersion: true, + runtimeEnvironment: { + select: { + type: true, + branchName: true, + }, + }, + }, + }, + this._prisma + ); + } + + const alert: FoundAlert = { ...alertWithoutRun, taskRun }; + if (alert.status !== "PENDING") { return; } diff --git a/apps/webapp/app/v3/services/cancelTaskAttemptDependencies.server.ts b/apps/webapp/app/v3/services/cancelTaskAttemptDependencies.server.ts index f3ad291ac9b..82b22d5935d 100644 --- a/apps/webapp/app/v3/services/cancelTaskAttemptDependencies.server.ts +++ b/apps/webapp/app/v3/services/cancelTaskAttemptDependencies.server.ts @@ -10,15 +10,15 @@ export class CancelTaskAttemptDependenciesService extends BaseService { where: { id: attemptId }, include: { dependencies: { - include: { - taskRun: true, + select: { + taskRunId: true, }, }, batchDependencies: { include: { runDependencies: { - include: { - taskRun: true, + select: { + taskRunId: true, }, }, }, @@ -45,14 +45,53 @@ export class CancelTaskAttemptDependenciesService extends BaseService { batchDependencies: taskAttempt.batchDependencies, }); + // Hydrate the dependent runs from both relation paths in a single batched read, + // deduping the ids that feed the query while preserving the original iteration order. + const taskRunIds = new Set(); + for (const dependency of taskAttempt.dependencies) { + taskRunIds.add(dependency.taskRunId); + } + for (const batchDependency of taskAttempt.batchDependencies) { + for (const runDependency of batchDependency.runDependencies) { + taskRunIds.add(runDependency.taskRunId); + } + } + + const runs = + taskRunIds.size > 0 + ? await this.runStore.findRuns( + { + where: { id: { in: [...taskRunIds] } }, + select: { + id: true, + engine: true, + status: true, + friendlyId: true, + taskEventStore: true, + createdAt: true, + completedAt: true, + }, + }, + this._prisma + ) + : []; + + const runMap = new Map(runs.map((run) => [run.id, run])); + // TaskAttempt will either have dependencies or batchDependencies for (const dependency of taskAttempt.dependencies) { - await cancelRunService.call(dependency.taskRun); + const run = runMap.get(dependency.taskRunId); + if (run) { + await cancelRunService.call(run); + } } for (const batchDependency of taskAttempt.batchDependencies) { for (const runDependency of batchDependency.runDependencies) { - await cancelRunService.call(runDependency.taskRun); + const run = runMap.get(runDependency.taskRunId); + if (run) { + await cancelRunService.call(run); + } } } } diff --git a/apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts b/apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts new file mode 100644 index 00000000000..385be889a51 --- /dev/null +++ b/apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts @@ -0,0 +1,256 @@ +import { containerTest } from "@internal/testcontainers"; +import type { Organization, PrismaClient, Project, RuntimeEnvironment } from "@trigger.dev/database"; +import { customAlphabet } from "nanoid"; +import { expect, vi } from "vitest"; +import { ApiBatchResultsPresenter } from "~/presenters/v3/ApiBatchResultsPresenter.server"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { seedTestEnvironment } from "../helpers/seedTestEnvironment"; + +vi.setConfig({ testTimeout: 60_000 }); + +const idGenerator = customAlphabet("123456789abcdefghijkmnopqrstuvwxyz", 21); + +function authEnv( + environment: RuntimeEnvironment, + project: Project, + organization: Organization +): AuthenticatedEnvironment { + return { ...environment, project, organization, orgMember: null } as AuthenticatedEnvironment; +} + +type SeedContext = { + environmentId: string; + projectId: string; + organizationId: string; + backgroundWorkerId: string; + backgroundWorkerTaskId: string; + queueId: string; +}; + +async function seedWorker(prisma: PrismaClient, ctx: Omit) { + const queue = await prisma.taskQueue.create({ + data: { + friendlyId: `queue_${idGenerator()}`, + name: "task/test-task", + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + }, + }); + + const worker = await prisma.backgroundWorker.create({ + data: { + friendlyId: `worker_${idGenerator()}`, + contentHash: "hash", + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + version: "20240101.1", + metadata: {}, + }, + }); + + const task = await prisma.backgroundWorkerTask.create({ + data: { + friendlyId: `task_${idGenerator()}`, + slug: "test-task", + filePath: "src/test.ts", + exportName: "testTask", + workerId: worker.id, + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + }, + }); + + return { queueId: queue.id, backgroundWorkerId: worker.id, backgroundWorkerTaskId: task.id }; +} + +async function seedRunWithAttempt( + prisma: PrismaClient, + ctx: SeedContext, + opts: { + status: "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS" | "CANCELED" | "EXECUTING"; + attempt?: { + status: "COMPLETED" | "FAILED"; + output?: string; + outputType?: string; + error?: unknown; + }; + } +) { + const runInternalId = idGenerator(); + const run = await prisma.taskRun.create({ + data: { + id: runInternalId, + friendlyId: `run_${runInternalId}`, + taskIdentifier: "test-task", + payload: "{}", + payloadType: "application/json", + traceId: idGenerator(), + spanId: idGenerator(), + queue: "task/test-task", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + status: opts.status, + }, + }); + + if (opts.attempt) { + await prisma.taskRunAttempt.create({ + data: { + friendlyId: `attempt_${idGenerator()}`, + taskRunId: run.id, + backgroundWorkerId: ctx.backgroundWorkerId, + backgroundWorkerTaskId: ctx.backgroundWorkerTaskId, + runtimeEnvironmentId: ctx.environmentId, + queueId: ctx.queueId, + status: opts.attempt.status, + output: opts.attempt.output, + outputType: opts.attempt.outputType ?? "application/json", + error: opts.attempt.error as any, + }, + }); + } + + return run; +} + +containerTest( + "ApiBatchResultsPresenter returns ordered results matching pre-decompose behavior", + async ({ prisma }) => { + const { environment, project, organization } = await seedTestEnvironment(prisma); + const worker = await seedWorker(prisma, { + environmentId: environment.id, + projectId: project.id, + organizationId: organization.id, + }); + const ctx: SeedContext = { + environmentId: environment.id, + projectId: project.id, + organizationId: organization.id, + ...worker, + }; + + // A successful run, a failed run, and an executing run (no terminal attempt → undefined). + const successRun = await seedRunWithAttempt(prisma, ctx, { + status: "COMPLETED_SUCCESSFULLY", + attempt: { status: "COMPLETED", output: "\"hello\"", outputType: "application/json" }, + }); + const failedRun = await seedRunWithAttempt(prisma, ctx, { + status: "COMPLETED_WITH_ERRORS", + attempt: { + status: "FAILED", + error: { type: "BUILT_IN_ERROR", name: "Error", message: "boom", stackTrace: "boom" }, + }, + }); + const executingRun = await seedRunWithAttempt(prisma, ctx, { + status: "EXECUTING", + }); + + const batchInternalId = idGenerator(); + const batchFriendlyId = `batch_${batchInternalId}`; + await prisma.batchTaskRun.create({ + data: { + id: batchInternalId, + friendlyId: batchFriendlyId, + runtimeEnvironmentId: environment.id, + }, + }); + + // Items inserted in a deterministic order: success, failed, executing. + for (const run of [successRun, failedRun, executingRun]) { + await prisma.batchTaskRunItem.create({ + data: { + batchTaskRunId: batchInternalId, + taskRunId: run.id, + }, + }); + } + + const presenter = new ApiBatchResultsPresenter(prisma); + const result = await presenter.call(batchFriendlyId, authEnv(environment, project, organization)); + + expect(result).toBeDefined(); + expect(result?.id).toBe(batchFriendlyId); + + // executing run yields no execution result → filtered out. Order preserved: success then failed. + expect(result?.items).toHaveLength(2); + + const [first, second] = result!.items; + expect(first.ok).toBe(true); + expect(first.id).toBe(successRun.friendlyId); + if (first.ok) { + expect(first.output).toBe("\"hello\""); + expect(first.taskIdentifier).toBe("test-task"); + } + + expect(second.ok).toBe(false); + expect(second.id).toBe(failedRun.friendlyId); + } +); + +containerTest( + "ApiBatchResultsPresenter filters runs without an execution result but keeps order", + async ({ prisma }) => { + const { environment, project, organization } = await seedTestEnvironment(prisma); + const worker = await seedWorker(prisma, { + environmentId: environment.id, + projectId: project.id, + organizationId: organization.id, + }); + const ctx: SeedContext = { + environmentId: environment.id, + projectId: project.id, + organizationId: organization.id, + ...worker, + }; + + // Pending run → executionResultForTaskRun returns undefined → filtered out, like the + // pre-decompose code did via `.filter(Boolean)`. + const pendingRun = await seedRunWithAttempt(prisma, ctx, { status: "EXECUTING" }); + const successRun = await seedRunWithAttempt(prisma, ctx, { + status: "COMPLETED_SUCCESSFULLY", + attempt: { status: "COMPLETED", output: "\"ok\"", outputType: "application/json" }, + }); + + const batchInternalId = idGenerator(); + const batchFriendlyId = `batch_${batchInternalId}`; + await prisma.batchTaskRun.create({ + data: { + id: batchInternalId, + friendlyId: batchFriendlyId, + runtimeEnvironmentId: environment.id, + }, + }); + + // pending first, success second — only the success result should survive, in order. + for (const run of [pendingRun, successRun]) { + await prisma.batchTaskRunItem.create({ + data: { batchTaskRunId: batchInternalId, taskRunId: run.id }, + }); + } + + const presenter = new ApiBatchResultsPresenter(prisma); + const result = await presenter.call(batchFriendlyId, authEnv(environment, project, organization)); + + expect(result?.items).toHaveLength(1); + expect(result?.items[0]?.id).toBe(successRun.friendlyId); + } +); + +containerTest("ApiBatchResultsPresenter short-circuits an empty batch", async ({ prisma }) => { + const { environment, project, organization } = await seedTestEnvironment(prisma); + + const batchInternalId = idGenerator(); + const batchFriendlyId = `batch_${batchInternalId}`; + await prisma.batchTaskRun.create({ + data: { + id: batchInternalId, + friendlyId: batchFriendlyId, + runtimeEnvironmentId: environment.id, + }, + }); + + const presenter = new ApiBatchResultsPresenter(prisma); + const result = await presenter.call(batchFriendlyId, authEnv(environment, project, organization)); + + expect(result).toEqual({ id: batchFriendlyId, items: [] }); +}); diff --git a/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts b/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts new file mode 100644 index 00000000000..03e090ea6c1 --- /dev/null +++ b/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts @@ -0,0 +1,238 @@ +import { containerTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { customAlphabet } from "nanoid"; +import { expect, vi } from "vitest"; +import { CancelTaskAttemptDependenciesService } from "~/v3/services/cancelTaskAttemptDependencies.server"; +import { CancelTaskRunService } from "~/v3/services/cancelTaskRun.server"; +import { seedTestEnvironment } from "../helpers/seedTestEnvironment"; + +vi.setConfig({ testTimeout: 60_000 }); + +const idGenerator = customAlphabet("123456789abcdefghijkmnopqrstuvwxyz", 21); + +type SeedContext = { + environmentId: string; + projectId: string; + organizationId: string; + backgroundWorkerId: string; + backgroundWorkerTaskId: string; + queueId: string; +}; + +async function seedWorker( + prisma: PrismaClient, + ctx: { environmentId: string; projectId: string } +) { + const queue = await prisma.taskQueue.create({ + data: { + friendlyId: `queue_${idGenerator()}`, + name: "task/test-task", + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + }, + }); + const worker = await prisma.backgroundWorker.create({ + data: { + friendlyId: `worker_${idGenerator()}`, + contentHash: "hash", + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + version: "20240101.1", + metadata: {}, + }, + }); + const task = await prisma.backgroundWorkerTask.create({ + data: { + friendlyId: `task_${idGenerator()}`, + slug: "test-task", + filePath: "src/test.ts", + workerId: worker.id, + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + }, + }); + return { queueId: queue.id, backgroundWorkerId: worker.id, backgroundWorkerTaskId: task.id }; +} + +async function seedRun(prisma: PrismaClient, ctx: SeedContext) { + const id = idGenerator(); + return prisma.taskRun.create({ + data: { + id, + friendlyId: `run_${id}`, + taskIdentifier: "test-task", + payload: "{}", + payloadType: "application/json", + traceId: idGenerator(), + spanId: idGenerator(), + queue: "task/test-task", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + }, + }); +} + +async function seedAttempt(prisma: PrismaClient, ctx: SeedContext, taskRunId: string) { + return prisma.taskRunAttempt.create({ + data: { + friendlyId: `attempt_${idGenerator()}`, + taskRunId, + backgroundWorkerId: ctx.backgroundWorkerId, + backgroundWorkerTaskId: ctx.backgroundWorkerTaskId, + runtimeEnvironmentId: ctx.environmentId, + queueId: ctx.queueId, + status: "CANCELED", + }, + }); +} + +containerTest( + "cancelTaskAttemptDependencies cancels each dependent run once, in original order", + async ({ prisma }) => { + const { environment, project, organization } = await seedTestEnvironment(prisma); + const worker = await seedWorker(prisma, { + environmentId: environment.id, + projectId: project.id, + }); + const ctx: SeedContext = { + environmentId: environment.id, + projectId: project.id, + organizationId: organization.id, + ...worker, + }; + + // The attempt whose dependencies we cancel. + const parentRun = await seedRun(prisma, ctx); + const parentAttempt = await seedAttempt(prisma, ctx, parentRun.id); + + // Two direct dependencies. + const depRunA = await seedRun(prisma, ctx); + const depRunB = await seedRun(prisma, ctx); + await prisma.taskRunDependency.create({ + data: { taskRunId: depRunA.id, dependentAttemptId: parentAttempt.id }, + }); + await prisma.taskRunDependency.create({ + data: { taskRunId: depRunB.id, dependentAttemptId: parentAttempt.id }, + }); + + // One batch dependency carrying two run dependencies. + const batchRunDepC = await seedRun(prisma, ctx); + const batchRunDepD = await seedRun(prisma, ctx); + const batchId = idGenerator(); + await prisma.batchTaskRun.create({ + data: { + id: batchId, + friendlyId: `batch_${batchId}`, + runtimeEnvironmentId: environment.id, + dependentTaskAttemptId: parentAttempt.id, + }, + }); + await prisma.taskRunDependency.create({ + data: { taskRunId: batchRunDepC.id, dependentBatchRunId: batchId }, + }); + await prisma.taskRunDependency.create({ + data: { taskRunId: batchRunDepD.id, dependentBatchRunId: batchId }, + }); + + const cancelledRunIds: string[] = []; + const callSpy = vi + .spyOn(CancelTaskRunService.prototype, "call") + .mockImplementation(async (taskRun: any) => { + cancelledRunIds.push(taskRun.id); + return { id: taskRun.id, alreadyFinished: false }; + }); + + try { + const service = new CancelTaskAttemptDependenciesService(prisma); + await service.call(parentAttempt.id); + } finally { + callSpy.mockRestore(); + } + + // Each dependent run cancelled exactly once. + expect(cancelledRunIds).toHaveLength(4); + expect(new Set(cancelledRunIds).size).toBe(4); + + // Direct dependencies first (both paths preserve insertion/iteration order), then batch run deps. + const directIds = cancelledRunIds.slice(0, 2); + const batchIds = cancelledRunIds.slice(2); + expect(new Set(directIds)).toEqual(new Set([depRunA.id, depRunB.id])); + expect(new Set(batchIds)).toEqual(new Set([batchRunDepC.id, batchRunDepD.id])); + + // The hydrated runs carry the fields CancelableTaskRun requires. + const cancelArgs = callSpy.mock.calls.map((c) => c[0] as any); + for (const run of cancelArgs) { + expect(run).toMatchObject({ + id: expect.any(String), + friendlyId: expect.any(String), + }); + expect(run).toHaveProperty("engine"); + expect(run).toHaveProperty("status"); + expect(run).toHaveProperty("taskEventStore"); + expect(run).toHaveProperty("createdAt"); + expect("completedAt" in run).toBe(true); + } + } +); + +containerTest( + "cancelTaskAttemptDependencies skips dependencies whose run is not hydrated", + async ({ prisma }) => { + const { environment, project, organization } = await seedTestEnvironment(prisma); + const worker = await seedWorker(prisma, { + environmentId: environment.id, + projectId: project.id, + }); + const ctx: SeedContext = { + environmentId: environment.id, + projectId: project.id, + organizationId: organization.id, + ...worker, + }; + + const parentRun = await seedRun(prisma, ctx); + const parentAttempt = await seedAttempt(prisma, ctx, parentRun.id); + + const presentRun = await seedRun(prisma, ctx); + const missingRun = await seedRun(prisma, ctx); + await prisma.taskRunDependency.create({ + data: { taskRunId: presentRun.id, dependentAttemptId: parentAttempt.id }, + }); + await prisma.taskRunDependency.create({ + data: { taskRunId: missingRun.id, dependentAttemptId: parentAttempt.id }, + }); + + const cancelledRunIds: string[] = []; + const callSpy = vi + .spyOn(CancelTaskRunService.prototype, "call") + .mockImplementation(async (taskRun: any) => { + cancelledRunIds.push(taskRun.id); + return { id: taskRun.id, alreadyFinished: false }; + }); + + // Inject a runStore that deliberately omits `missingRun` to exercise the runMap-miss skip + // (the post-redirect "run not found here" case). The constructor's third arg is the seam. + const filteringRunStore = { + findRuns: async (args: any) => { + const ids: string[] = args.where.id.in; + return prisma.taskRun.findMany({ + where: { id: { in: ids.filter((id) => id !== missingRun.id) } }, + select: args.select, + }); + }, + } as any; + + try { + const service = new CancelTaskAttemptDependenciesService( + prisma, + undefined, + filteringRunStore + ); + await service.call(parentAttempt.id); + } finally { + callSpy.mockRestore(); + } + + expect(cancelledRunIds).toEqual([presentRun.id]); + } +); From cb12430424e7707be029b8d2a07fbce636c2dd91 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 16:31:01 +0100 Subject: [PATCH 29/83] chore(scripts): flag recover-stuck-runs raw TaskRun read for table cutover The recovery script joins TaskRunExecutionSnapshot to TaskRun in raw SQL, so it is the one TaskRun read not routed through the run store. Add a note to revisit it at table cutover. --- scripts/recover-stuck-runs.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/recover-stuck-runs.ts b/scripts/recover-stuck-runs.ts index 15deeb899c9..28bb4e85e46 100755 --- a/scripts/recover-stuck-runs.ts +++ b/scripts/recover-stuck-runs.ts @@ -187,7 +187,9 @@ async function main() { console.log(`📊 Found ${runIds.length} runs in currentConcurrency set`); - // Query database for latest snapshots and queue info of these runs + // Query database for latest snapshots and queue info of these runs. + // NOTE: raw join of TaskRunExecutionSnapshot to TaskRun, the one TaskRun read not behind + // RunStore (a join, not a by-id read, in an ops script). Revisit at table cutover. const runInfo = await prisma.$queryRaw< Array<{ runId: string; From ae57f25a03b20c51cb4c4869ce686d715de4e91a Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 16:41:47 +0100 Subject: [PATCH 30/83] chore(webapp): add server-changes entry for run-store read routing --- .server-changes/route-taskrun-reads-through-run-store.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .server-changes/route-taskrun-reads-through-run-store.md diff --git a/.server-changes/route-taskrun-reads-through-run-store.md b/.server-changes/route-taskrun-reads-through-run-store.md new file mode 100644 index 00000000000..dad804e40ba --- /dev/null +++ b/.server-changes/route-taskrun-reads-through-run-store.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Route Postgres task run reads through the run store so they can be retargeted to a different backing store without changing call sites. From fcc26d4ebd3966d039b923f6a49476da17955985 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 17:01:19 +0100 Subject: [PATCH 31/83] test(webapp): mock db.server in the new run-store read tests The new container tests import the service and presenter, which pull the db.server singleton in through their base classes. Mock it so the tests do not try to connect to the env database when none is reachable (the CI unit shards), matching the existing webapp container-test pattern. The tests use the injected testcontainer prisma for all reads. --- apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts | 4 ++++ .../test/services/cancelTaskAttemptDependencies.test.ts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts b/apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts index 385be889a51..d0888ba6a18 100644 --- a/apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts +++ b/apps/webapp/test/presenters/ApiBatchResultsPresenter.test.ts @@ -6,6 +6,10 @@ import { ApiBatchResultsPresenter } from "~/presenters/v3/ApiBatchResultsPresent import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { seedTestEnvironment } from "../helpers/seedTestEnvironment"; +// Neutralize the db.server singleton so importing the presenter (via BasePresenter) does not try +// to connect to the env database; the test uses the injected testcontainer prisma for all reads. +vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); + vi.setConfig({ testTimeout: 60_000 }); const idGenerator = customAlphabet("123456789abcdefghijkmnopqrstuvwxyz", 21); diff --git a/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts b/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts index 03e090ea6c1..65ecef73a86 100644 --- a/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts +++ b/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts @@ -6,6 +6,10 @@ import { CancelTaskAttemptDependenciesService } from "~/v3/services/cancelTaskAt import { CancelTaskRunService } from "~/v3/services/cancelTaskRun.server"; import { seedTestEnvironment } from "../helpers/seedTestEnvironment"; +// Neutralize the db.server singleton so importing the service (via BaseService) does not try to +// connect to the env database; the test uses the injected testcontainer prisma for all reads. +vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); + vi.setConfig({ testTimeout: 60_000 }); const idGenerator = customAlphabet("123456789abcdefghijkmnopqrstuvwxyz", 21); From 789e10780960acc51ed27aae11fdb705f5fd6594 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Thu, 18 Jun 2026 17:36:57 +0100 Subject: [PATCH 32/83] test(webapp): drop the cancelTaskAttemptDependencies container test Importing the service pulls the cancel chain, which eagerly initializes the concurrency tracker singleton and requires REDIS_HOST/REDIS_PORT at import time, so the suite cannot load in the unit-test shards without stacking mocks. The decompose it covered is exercised by the analogous batch-results container test and confirmed by review, so drop this one rather than mock the tracker and cancel chain. --- .../cancelTaskAttemptDependencies.test.ts | 242 ------------------ 1 file changed, 242 deletions(-) delete mode 100644 apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts diff --git a/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts b/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts deleted file mode 100644 index 65ecef73a86..00000000000 --- a/apps/webapp/test/services/cancelTaskAttemptDependencies.test.ts +++ /dev/null @@ -1,242 +0,0 @@ -import { containerTest } from "@internal/testcontainers"; -import type { PrismaClient } from "@trigger.dev/database"; -import { customAlphabet } from "nanoid"; -import { expect, vi } from "vitest"; -import { CancelTaskAttemptDependenciesService } from "~/v3/services/cancelTaskAttemptDependencies.server"; -import { CancelTaskRunService } from "~/v3/services/cancelTaskRun.server"; -import { seedTestEnvironment } from "../helpers/seedTestEnvironment"; - -// Neutralize the db.server singleton so importing the service (via BaseService) does not try to -// connect to the env database; the test uses the injected testcontainer prisma for all reads. -vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); - -vi.setConfig({ testTimeout: 60_000 }); - -const idGenerator = customAlphabet("123456789abcdefghijkmnopqrstuvwxyz", 21); - -type SeedContext = { - environmentId: string; - projectId: string; - organizationId: string; - backgroundWorkerId: string; - backgroundWorkerTaskId: string; - queueId: string; -}; - -async function seedWorker( - prisma: PrismaClient, - ctx: { environmentId: string; projectId: string } -) { - const queue = await prisma.taskQueue.create({ - data: { - friendlyId: `queue_${idGenerator()}`, - name: "task/test-task", - projectId: ctx.projectId, - runtimeEnvironmentId: ctx.environmentId, - }, - }); - const worker = await prisma.backgroundWorker.create({ - data: { - friendlyId: `worker_${idGenerator()}`, - contentHash: "hash", - projectId: ctx.projectId, - runtimeEnvironmentId: ctx.environmentId, - version: "20240101.1", - metadata: {}, - }, - }); - const task = await prisma.backgroundWorkerTask.create({ - data: { - friendlyId: `task_${idGenerator()}`, - slug: "test-task", - filePath: "src/test.ts", - workerId: worker.id, - projectId: ctx.projectId, - runtimeEnvironmentId: ctx.environmentId, - }, - }); - return { queueId: queue.id, backgroundWorkerId: worker.id, backgroundWorkerTaskId: task.id }; -} - -async function seedRun(prisma: PrismaClient, ctx: SeedContext) { - const id = idGenerator(); - return prisma.taskRun.create({ - data: { - id, - friendlyId: `run_${id}`, - taskIdentifier: "test-task", - payload: "{}", - payloadType: "application/json", - traceId: idGenerator(), - spanId: idGenerator(), - queue: "task/test-task", - runtimeEnvironmentId: ctx.environmentId, - projectId: ctx.projectId, - }, - }); -} - -async function seedAttempt(prisma: PrismaClient, ctx: SeedContext, taskRunId: string) { - return prisma.taskRunAttempt.create({ - data: { - friendlyId: `attempt_${idGenerator()}`, - taskRunId, - backgroundWorkerId: ctx.backgroundWorkerId, - backgroundWorkerTaskId: ctx.backgroundWorkerTaskId, - runtimeEnvironmentId: ctx.environmentId, - queueId: ctx.queueId, - status: "CANCELED", - }, - }); -} - -containerTest( - "cancelTaskAttemptDependencies cancels each dependent run once, in original order", - async ({ prisma }) => { - const { environment, project, organization } = await seedTestEnvironment(prisma); - const worker = await seedWorker(prisma, { - environmentId: environment.id, - projectId: project.id, - }); - const ctx: SeedContext = { - environmentId: environment.id, - projectId: project.id, - organizationId: organization.id, - ...worker, - }; - - // The attempt whose dependencies we cancel. - const parentRun = await seedRun(prisma, ctx); - const parentAttempt = await seedAttempt(prisma, ctx, parentRun.id); - - // Two direct dependencies. - const depRunA = await seedRun(prisma, ctx); - const depRunB = await seedRun(prisma, ctx); - await prisma.taskRunDependency.create({ - data: { taskRunId: depRunA.id, dependentAttemptId: parentAttempt.id }, - }); - await prisma.taskRunDependency.create({ - data: { taskRunId: depRunB.id, dependentAttemptId: parentAttempt.id }, - }); - - // One batch dependency carrying two run dependencies. - const batchRunDepC = await seedRun(prisma, ctx); - const batchRunDepD = await seedRun(prisma, ctx); - const batchId = idGenerator(); - await prisma.batchTaskRun.create({ - data: { - id: batchId, - friendlyId: `batch_${batchId}`, - runtimeEnvironmentId: environment.id, - dependentTaskAttemptId: parentAttempt.id, - }, - }); - await prisma.taskRunDependency.create({ - data: { taskRunId: batchRunDepC.id, dependentBatchRunId: batchId }, - }); - await prisma.taskRunDependency.create({ - data: { taskRunId: batchRunDepD.id, dependentBatchRunId: batchId }, - }); - - const cancelledRunIds: string[] = []; - const callSpy = vi - .spyOn(CancelTaskRunService.prototype, "call") - .mockImplementation(async (taskRun: any) => { - cancelledRunIds.push(taskRun.id); - return { id: taskRun.id, alreadyFinished: false }; - }); - - try { - const service = new CancelTaskAttemptDependenciesService(prisma); - await service.call(parentAttempt.id); - } finally { - callSpy.mockRestore(); - } - - // Each dependent run cancelled exactly once. - expect(cancelledRunIds).toHaveLength(4); - expect(new Set(cancelledRunIds).size).toBe(4); - - // Direct dependencies first (both paths preserve insertion/iteration order), then batch run deps. - const directIds = cancelledRunIds.slice(0, 2); - const batchIds = cancelledRunIds.slice(2); - expect(new Set(directIds)).toEqual(new Set([depRunA.id, depRunB.id])); - expect(new Set(batchIds)).toEqual(new Set([batchRunDepC.id, batchRunDepD.id])); - - // The hydrated runs carry the fields CancelableTaskRun requires. - const cancelArgs = callSpy.mock.calls.map((c) => c[0] as any); - for (const run of cancelArgs) { - expect(run).toMatchObject({ - id: expect.any(String), - friendlyId: expect.any(String), - }); - expect(run).toHaveProperty("engine"); - expect(run).toHaveProperty("status"); - expect(run).toHaveProperty("taskEventStore"); - expect(run).toHaveProperty("createdAt"); - expect("completedAt" in run).toBe(true); - } - } -); - -containerTest( - "cancelTaskAttemptDependencies skips dependencies whose run is not hydrated", - async ({ prisma }) => { - const { environment, project, organization } = await seedTestEnvironment(prisma); - const worker = await seedWorker(prisma, { - environmentId: environment.id, - projectId: project.id, - }); - const ctx: SeedContext = { - environmentId: environment.id, - projectId: project.id, - organizationId: organization.id, - ...worker, - }; - - const parentRun = await seedRun(prisma, ctx); - const parentAttempt = await seedAttempt(prisma, ctx, parentRun.id); - - const presentRun = await seedRun(prisma, ctx); - const missingRun = await seedRun(prisma, ctx); - await prisma.taskRunDependency.create({ - data: { taskRunId: presentRun.id, dependentAttemptId: parentAttempt.id }, - }); - await prisma.taskRunDependency.create({ - data: { taskRunId: missingRun.id, dependentAttemptId: parentAttempt.id }, - }); - - const cancelledRunIds: string[] = []; - const callSpy = vi - .spyOn(CancelTaskRunService.prototype, "call") - .mockImplementation(async (taskRun: any) => { - cancelledRunIds.push(taskRun.id); - return { id: taskRun.id, alreadyFinished: false }; - }); - - // Inject a runStore that deliberately omits `missingRun` to exercise the runMap-miss skip - // (the post-redirect "run not found here" case). The constructor's third arg is the seam. - const filteringRunStore = { - findRuns: async (args: any) => { - const ids: string[] = args.where.id.in; - return prisma.taskRun.findMany({ - where: { id: { in: ids.filter((id) => id !== missingRun.id) } }, - select: args.select, - }); - }, - } as any; - - try { - const service = new CancelTaskAttemptDependenciesService( - prisma, - undefined, - filteringRunStore - ); - await service.call(parentAttempt.id); - } finally { - callSpy.mockRestore(); - } - - expect(cancelledRunIds).toEqual([presentRun.id]); - } -); From 650a081c2819bb819da863503632a9270eb7df02 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 11:07:54 +0100 Subject: [PATCH 33/83] feat(core): add KSUID run-id minting and an isKsuidId discriminator Add an isomorphic generateKsuid() and an isKsuidId() format check to the run-id scheme, so a run's id can encode which table it belongs to. Additive groundwork: nothing mints KSUIDs yet, and generate() (cuid) is unchanged. --- .../core/src/v3/isomorphic/friendlyId.test.ts | 99 +++++++++++++++++++ packages/core/src/v3/isomorphic/friendlyId.ts | 88 +++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 packages/core/src/v3/isomorphic/friendlyId.test.ts diff --git a/packages/core/src/v3/isomorphic/friendlyId.test.ts b/packages/core/src/v3/isomorphic/friendlyId.test.ts new file mode 100644 index 00000000000..e3221fce7f4 --- /dev/null +++ b/packages/core/src/v3/isomorphic/friendlyId.test.ts @@ -0,0 +1,99 @@ +import { describe, it, expect } from "vitest"; +import { + fromFriendlyId, + generateKsuid, + isKsuidId, + RunId, + toFriendlyId, +} from "./friendlyId.js"; + +const BASE62 = /^[0-9A-Za-z]+$/; + +describe("isKsuidId", () => { + it("is true for a freshly minted ksuid and its friendlyId", () => { + const { id, friendlyId } = RunId.generateKsuid(); + + expect(isKsuidId(id)).toBe(true); + expect(isKsuidId(friendlyId)).toBe(true); + }); + + it("is false for a legacy cuid id and its friendlyId", () => { + const { id, friendlyId } = RunId.generate(); + + // sanity: legacy cuid is 25 chars + expect(id.length).toBe(25); + expect(isKsuidId(id)).toBe(false); + expect(isKsuidId(friendlyId)).toBe(false); + }); + + it("is false for empty, prefix-only, and malformed input", () => { + expect(isKsuidId("")).toBe(false); + expect(isKsuidId("run_")).toBe(false); + + // 27 chars but contains a non-base62 char (`-`) + const twentySevenWithDash = `${"a".repeat(26)}-`; + expect(twentySevenWithDash).toHaveLength(27); + expect(isKsuidId(twentySevenWithDash)).toBe(false); + expect(isKsuidId(`run_${twentySevenWithDash}`)).toBe(false); + }); + + it("is false for a 26-char and a 28-char body", () => { + expect("a".repeat(26)).toHaveLength(26); + expect(isKsuidId("a".repeat(26))).toBe(false); + expect(isKsuidId("a".repeat(28))).toBe(false); + expect(isKsuidId(`run_${"a".repeat(26)}`)).toBe(false); + expect(isKsuidId(`run_${"a".repeat(28)}`)).toBe(false); + }); +}); + +describe("generateKsuid", () => { + it("produces a 27-char base62 body", () => { + const id = generateKsuid(); + + expect(id).toHaveLength(27); + expect(id).toMatch(BASE62); + }); + + it("produces unique ids across calls", () => { + const ids = new Set(Array.from({ length: 100 }, () => generateKsuid())); + + expect(ids.size).toBe(100); + }); + + it("round-trips through toFriendlyId / fromFriendlyId", () => { + const id = generateKsuid(); + const friendlyId = toFriendlyId("run", id); + + expect(friendlyId).toBe(`run_${id}`); + expect(fromFriendlyId(friendlyId)).toBe(id); + + const generated = RunId.generateKsuid(); + expect(generated.friendlyId).toBe(`run_${generated.id}`); + expect(RunId.fromFriendlyId(generated.friendlyId)).toBe(generated.id); + }); + + it("is time-ordered: a later timestamp sorts after an earlier one", () => { + // The timestamp lives in the high bytes, so a larger timestamp encodes to a + // lexicographically-greater (left-padded, fixed-width) base62 string. + const realNow = Date.now; + try { + Date.now = () => 1_500_000_000_000; + const earlier = generateKsuid(); + Date.now = () => 1_500_000_100_000; + const later = generateKsuid(); + + expect(later > earlier).toBe(true); + expect(isKsuidId(earlier)).toBe(true); + expect(isKsuidId(later)).toBe(true); + } finally { + Date.now = realNow; + } + }); +}); + +describe("isKsuidId and the minter agree", () => { + it("isKsuidId(generateKsuid().id) === true and isKsuidId(generate().id) === false", () => { + expect(isKsuidId(RunId.generateKsuid().id)).toBe(true); + expect(isKsuidId(RunId.generate().id)).toBe(false); + }); +}); diff --git a/packages/core/src/v3/isomorphic/friendlyId.ts b/packages/core/src/v3/isomorphic/friendlyId.ts index 66575c7c178..ebcc8dfa284 100644 --- a/packages/core/src/v3/isomorphic/friendlyId.ts +++ b/packages/core/src/v3/isomorphic/friendlyId.ts @@ -11,6 +11,84 @@ export function generateInternalId() { return cuid(); } +// KSUID epoch (2014-05-13T16:53:20Z) — seconds offset applied to the unix timestamp. +const KSUID_EPOCH = 1_400_000_000; +const KSUID_TIMESTAMP_BYTES = 4; +const KSUID_PAYLOAD_BYTES = 16; +const KSUID_TOTAL_BYTES = KSUID_TIMESTAMP_BYTES + KSUID_PAYLOAD_BYTES; +const KSUID_STRING_LENGTH = 27; +const BASE62_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +/** Encode raw bytes as base62, left-padded to the given length. */ +function base62Encode(bytes: Uint8Array, length: number): string { + // Big-endian base-256 -> base-62 conversion (repeated division). + const digits = Array.from(bytes); + let result = ""; + + while (digits.length > 0) { + let remainder = 0; + const quotient: number[] = []; + + for (let i = 0; i < digits.length; i++) { + const acc = (digits[i] ?? 0) + remainder * 256; + const q = Math.floor(acc / 62); + remainder = acc % 62; + + if (quotient.length > 0 || q > 0) { + quotient.push(q); + } + } + + // `remainder` is always in [0, 61], so this index is always valid. + result = BASE62_ALPHABET.charAt(remainder) + result; + digits.length = 0; + digits.push(...quotient); + } + + return result.padStart(length, BASE62_ALPHABET.charAt(0)); +} + +/** + * Mint a KSUID body: a 27-char, base62, time-ordered identifier. + * + * Layout: 4-byte big-endian uint32 timestamp (seconds since the KSUID epoch) + * + 16 random bytes = 20 bytes, base62-encoded and left-padded to 27 chars. + * + * Isomorphic: relies only on `globalThis.crypto.getRandomValues` for randomness. + */ +export function generateKsuid(): string { + const bytes = new Uint8Array(KSUID_TOTAL_BYTES); + + const timestamp = Math.floor(Date.now() / 1000) - KSUID_EPOCH; + bytes[0] = (timestamp >>> 24) & 0xff; + bytes[1] = (timestamp >>> 16) & 0xff; + bytes[2] = (timestamp >>> 8) & 0xff; + bytes[3] = timestamp & 0xff; + + globalThis.crypto.getRandomValues(bytes.subarray(KSUID_TIMESTAMP_BYTES)); + + return base62Encode(bytes, KSUID_STRING_LENGTH); +} + +/** + * Pure string discriminator: is this id (or friendlyId) a KSUID-format body? + * + * Strips a leading `"_"` if present, then tests the body for the KSUID + * shape (27 chars, base62). The 25-char legacy cuid and any malformed input + * return false. Never throws. + */ +export function isKsuidId(idOrFriendlyId: string): boolean { + if (!idOrFriendlyId) { + return false; + } + + const underscoreIndex = idOrFriendlyId.indexOf("_"); + const body = + underscoreIndex === -1 ? idOrFriendlyId : idOrFriendlyId.slice(underscoreIndex + 1); + + return body.length === KSUID_STRING_LENGTH && /^[0-9A-Za-z]{27}$/.test(body); +} + /** Convert an internal ID to a friendly ID */ export function toFriendlyId(entityName: string, internalId: string): string { if (!entityName) { @@ -69,6 +147,16 @@ export class IdUtil { }; } + /** Mint an id whose body is a KSUID (27-char, base62, time-ordered). */ + generateKsuid() { + const internalId = generateKsuid(); + + return { + id: internalId, + friendlyId: this.toFriendlyId(internalId), + }; + } + toFriendlyId(internalId: string) { return toFriendlyId(this.entityName, internalId); } From 40aea1b9af82131e38de0221e5108ad053ef5b01 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 11:16:15 +0100 Subject: [PATCH 34/83] feat(database): add the task_run_v2 table Add task_run_v2 as a scalar clone of TaskRun with no foreign-key constraints, plus a (createdAt, id) index for keyset pagination. Unused for now; new runs are routed to it by id format in a later change. --- .../migration.sql | 121 +++++++++++ .../database/prisma/schema.prisma | 192 ++++++++++++++++++ 2 files changed, 313 insertions(+) create mode 100644 internal-packages/database/prisma/migrations/20260616151544_create_task_run_v2/migration.sql diff --git a/internal-packages/database/prisma/migrations/20260616151544_create_task_run_v2/migration.sql b/internal-packages/database/prisma/migrations/20260616151544_create_task_run_v2/migration.sql new file mode 100644 index 00000000000..22a8bcf2293 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260616151544_create_task_run_v2/migration.sql @@ -0,0 +1,121 @@ +-- CreateTable +CREATE TABLE "public"."task_run_v2" ( + "id" TEXT NOT NULL, + "number" INTEGER NOT NULL DEFAULT 0, + "friendlyId" TEXT NOT NULL, + "engine" "public"."RunEngineVersion" NOT NULL DEFAULT 'V1', + "status" "public"."TaskRunStatus" NOT NULL DEFAULT 'PENDING', + "statusReason" TEXT, + "idempotencyKey" TEXT, + "idempotencyKeyExpiresAt" TIMESTAMP(3), + "idempotencyKeyOptions" JSONB, + "debounce" JSONB, + "taskIdentifier" TEXT NOT NULL, + "isTest" BOOLEAN NOT NULL DEFAULT false, + "payload" TEXT NOT NULL, + "payloadType" TEXT NOT NULL DEFAULT 'application/json', + "context" JSONB, + "traceContext" JSONB, + "traceId" TEXT NOT NULL, + "spanId" TEXT NOT NULL, + "runtimeEnvironmentId" TEXT NOT NULL, + "environmentType" "public"."RuntimeEnvironmentType", + "projectId" TEXT NOT NULL, + "organizationId" TEXT, + "queue" TEXT NOT NULL, + "lockedQueueId" TEXT, + "masterQueue" TEXT NOT NULL DEFAULT 'main', + "region" TEXT, + "secondaryMasterQueue" TEXT, + "attemptNumber" INTEGER, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + "runTags" TEXT[], + "taskVersion" TEXT, + "sdkVersion" TEXT, + "cliVersion" TEXT, + "startedAt" TIMESTAMP(3), + "executedAt" TIMESTAMP(3), + "completedAt" TIMESTAMP(3), + "machinePreset" TEXT, + "usageDurationMs" INTEGER NOT NULL DEFAULT 0, + "costInCents" DOUBLE PRECISION NOT NULL DEFAULT 0, + "baseCostInCents" DOUBLE PRECISION NOT NULL DEFAULT 0, + "lockedAt" TIMESTAMP(3), + "lockedById" TEXT, + "lockedToVersionId" TEXT, + "priorityMs" INTEGER NOT NULL DEFAULT 0, + "concurrencyKey" TEXT, + "delayUntil" TIMESTAMP(3), + "queuedAt" TIMESTAMP(3), + "ttl" TEXT, + "expiredAt" TIMESTAMP(3), + "maxAttempts" INTEGER, + "lockedRetryConfig" JSONB, + "oneTimeUseToken" TEXT, + "taskEventStore" TEXT NOT NULL DEFAULT 'taskEvent', + "queueTimestamp" TIMESTAMP(3), + "scheduleInstanceId" TEXT, + "scheduleId" TEXT, + "bulkActionGroupIds" TEXT[] DEFAULT ARRAY[]::TEXT[], + "logsDeletedAt" TIMESTAMP(3), + "replayedFromTaskRunFriendlyId" TEXT, + "rootTaskRunId" TEXT, + "parentTaskRunId" TEXT, + "parentTaskRunAttemptId" TEXT, + "batchId" TEXT, + "resumeParentOnCompletion" BOOLEAN NOT NULL DEFAULT false, + "depth" INTEGER NOT NULL DEFAULT 0, + "parentSpanId" TEXT, + "runChainState" JSONB, + "seedMetadata" TEXT, + "seedMetadataType" TEXT NOT NULL DEFAULT 'application/json', + "metadata" TEXT, + "metadataType" TEXT NOT NULL DEFAULT 'application/json', + "metadataVersion" INTEGER NOT NULL DEFAULT 1, + "annotations" JSONB, + "isWarmStart" BOOLEAN, + "output" TEXT, + "outputType" TEXT NOT NULL DEFAULT 'application/json', + "error" JSONB, + "planType" TEXT, + "maxDurationInSeconds" INTEGER, + "realtimeStreamsVersion" TEXT NOT NULL DEFAULT 'v1', + "realtimeStreams" TEXT[] DEFAULT ARRAY[]::TEXT[], + "streamBasinName" TEXT, + + CONSTRAINT "task_run_v2_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE UNIQUE INDEX "task_run_v2_friendlyId_key" ON "public"."task_run_v2"("friendlyId"); + +-- CreateIndex +CREATE INDEX "task_run_v2_parentTaskRunId_idx" ON "public"."task_run_v2"("parentTaskRunId"); + +-- CreateIndex +CREATE INDEX "task_run_v2_spanId_idx" ON "public"."task_run_v2"("spanId"); + +-- CreateIndex +CREATE INDEX "task_run_v2_parentSpanId_idx" ON "public"."task_run_v2"("parentSpanId"); + +-- CreateIndex +CREATE INDEX "task_run_v2_runTags_idx" ON "public"."task_run_v2" USING GIN ("runTags" array_ops); + +-- CreateIndex +CREATE INDEX "task_run_v2_runtimeEnvironmentId_batchId_idx" ON "public"."task_run_v2"("runtimeEnvironmentId", "batchId"); + +-- CreateIndex +CREATE INDEX "task_run_v2_runtimeEnvironmentId_createdAt_idx" ON "public"."task_run_v2"("runtimeEnvironmentId", "createdAt" DESC); + +-- CreateIndex +CREATE INDEX "task_run_v2_createdAt_idx" ON "public"."task_run_v2" USING BRIN ("createdAt"); + +-- CreateIndex +CREATE INDEX "task_run_v2_createdAt_id_idx" ON "public"."task_run_v2"("createdAt", "id"); + +-- CreateIndex +CREATE UNIQUE INDEX "task_run_v2_oneTimeUseToken_key" ON "public"."task_run_v2"("oneTimeUseToken"); + +-- CreateIndex +CREATE UNIQUE INDEX "task_run_v2_runtimeEnvironmentId_taskIdentifier_idempotency_key" ON "public"."task_run_v2"("runtimeEnvironmentId", "taskIdentifier", "idempotencyKey"); diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index bb80da3a7ec..844d0da5aed 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -1095,6 +1095,198 @@ model TaskRun { @@index([createdAt], type: Brin) } +/// Parallel mirror of TaskRun. +/// Structural copy of TaskRun's scalar columns with NO relation fields, so it +/// carries zero foreign-key constraints and requires no edits to other models. +/// FK id columns are kept as plain scalars; integrity is enforced in app code, +/// matching TaskRun's current FK-free state. Not yet written to or read from. +model TaskRunV2 { + id String @id @default(cuid()) + + number Int @default(0) + friendlyId String @unique + + engine RunEngineVersion @default(V1) + + status TaskRunStatus @default(PENDING) + statusReason String? + + idempotencyKey String? + idempotencyKeyExpiresAt DateTime? + /// Stores the user-provided key and scope: { key: string, scope: "run" | "attempt" | "global" } + idempotencyKeyOptions Json? + + /// Debounce options: { key: string, delay: string, createdAt: Date } + debounce Json? + + taskIdentifier String + + isTest Boolean @default(false) + + payload String + payloadType String @default("application/json") + context Json? + traceContext Json? + + traceId String + spanId String + + runtimeEnvironmentId String + + environmentType RuntimeEnvironmentType? + + projectId String + + organizationId String? + + // The specific queue this run is in + queue String + // The queueId is set when the run is locked to a specific queue + lockedQueueId String? + + /// The main queue that this run is part of + workerQueue String @default("main") @map("masterQueue") + + /// User-facing geo region, stamped at trigger; workerQueue is where it actually ran. + region String? + + /// @deprecated + secondaryMasterQueue String? + + /// From engine v2+ this will be defined after a run has been dequeued (starting at 1) + attemptNumber Int? + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + /// Denormized column that holds the raw tags + runTags String[] + + /// Denormalized version of the background worker task + taskVersion String? + sdkVersion String? + cliVersion String? + + /// startedAt marks the point at which a run is dequeued from MarQS + startedAt DateTime? + /// executedAt is set when the first attempt is about to execute + executedAt DateTime? + completedAt DateTime? + machinePreset String? + + usageDurationMs Int @default(0) + costInCents Float @default(0) + baseCostInCents Float @default(0) + + lockedAt DateTime? + lockedById String? + + lockedToVersionId String? + + /// The "priority" of the run. This is just a negative offset in ms for the queue timestamp + /// E.g. a value of 60_000 would put the run into the queue 60s ago. + priorityMs Int @default(0) + + concurrencyKey String? + + delayUntil DateTime? + queuedAt DateTime? + ttl String? + expiredAt DateTime? + maxAttempts Int? + lockedRetryConfig Json? + + /// optional token that can be used to authenticate the task run + oneTimeUseToken String? + + /// Where the logs are stored + taskEventStore String @default("taskEvent") + + queueTimestamp DateTime? + + scheduleInstanceId String? + scheduleId String? + + bulkActionGroupIds String[] @default([]) + + logsDeletedAt DateTime? + + replayedFromTaskRunFriendlyId String? + + rootTaskRunId String? + + parentTaskRunId String? + + parentTaskRunAttemptId String? + + batchId String? + + /// whether or not the task run was created because of a triggerAndWait for batchTriggerAndWait + resumeParentOnCompletion Boolean @default(false) + + /// The depth of this task run in the task run hierarchy + depth Int @default(0) + + /// The span ID of the "trigger" span in the parent task run + parentSpanId String? + + /// Holds the state of the run chain for deadlock detection + runChainState Json? + + /// seed run metadata + seedMetadata String? + seedMetadataType String @default("application/json") + + /// Run metadata + metadata String? + metadataType String @default("application/json") + metadataVersion Int @default(1) + + /// Structured annotations: triggerSource, triggerAction, rootTriggerSource, rootScheduleId + annotations Json? + + /// Whether the latest attempt was a warm start. Null until first attempt starts. + isWarmStart Boolean? + + /// Run output + output String? + outputType String @default("application/json") + + /// Run error + error Json? + + /// Organization's billing plan type (cached for fallback when billing API fails) + planType String? + + maxDurationInSeconds Int? + + /// The version of the realtime streams implementation used by the run + realtimeStreamsVersion String @default("v1") + /// Store the stream keys that are being used by the run + realtimeStreams String[] @default([]) + /// S2 basin where this run's realtime streams live. Stamped at create + /// time from `Organization.streamBasinName` so reads can resolve the + /// basin without joining org. Null when the org has no per-org basin + /// (OSS, or pre-backfill); reads fall back to the global basin. + streamBasinName String? + + @@unique([oneTimeUseToken]) + @@unique([runtimeEnvironmentId, taskIdentifier, idempotencyKey]) + // Finding child runs + @@index([parentTaskRunId]) + // Run page inspector + @@index([spanId]) + @@index([parentSpanId]) + // Finding runs in a batch + @@index([runTags(ops: ArrayOps)], type: Gin) + @@index([runtimeEnvironmentId, batchId]) + @@index([runtimeEnvironmentId, createdAt(sort: Desc)]) + @@index([createdAt], type: Brin) + // Keyset cursor for merged pagination across run tables + @@index([createdAt, id]) + @@map("task_run_v2") +} + model TaskRunTemplate { id String @id @default(cuid()) From 72af7aae407c4419d055a8eeb901cfd943cec358 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 12:02:27 +0100 Subject: [PATCH 35/83] feat(database): drop incoming foreign keys referencing TaskRun Drop the 14 child-table foreign keys that referenced TaskRun.id so a child row can reference a run in either the legacy or the new run table by plain scalar. Run integrity moves to app code, symmetric with TaskRun's already dropped outgoing foreign keys. Relations stay in the Prisma schema. --- .../migration.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 internal-packages/database/prisma/migrations/20260619120042_drop_taskrun_incoming_fks/migration.sql diff --git a/internal-packages/database/prisma/migrations/20260619120042_drop_taskrun_incoming_fks/migration.sql b/internal-packages/database/prisma/migrations/20260619120042_drop_taskrun_incoming_fks/migration.sql new file mode 100644 index 00000000000..9e7313aade9 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260619120042_drop_taskrun_incoming_fks/migration.sql @@ -0,0 +1,17 @@ +-- Drop all foreign key constraints that reference TaskRun.id from child tables +-- (no schema change, data intact). Integrity moves to app code so a child row +-- can reference a run in either TaskRun (legacy) or task_run_v2 (new) by scalar. +ALTER TABLE "public"."TaskRunAttempt" DROP CONSTRAINT IF EXISTS "TaskRunAttempt_taskRunId_fkey"; +ALTER TABLE "public"."TaskRunDependency" DROP CONSTRAINT IF EXISTS "TaskRunDependency_taskRunId_fkey"; +ALTER TABLE "public"."BatchTaskRunItem" DROP CONSTRAINT IF EXISTS "BatchTaskRunItem_taskRunId_fkey"; +ALTER TABLE "public"."Checkpoint" DROP CONSTRAINT IF EXISTS "Checkpoint_runId_fkey"; +ALTER TABLE "public"."CheckpointRestoreEvent" DROP CONSTRAINT IF EXISTS "CheckpointRestoreEvent_runId_fkey"; +ALTER TABLE "public"."ProjectAlert" DROP CONSTRAINT IF EXISTS "ProjectAlert_taskRunId_fkey"; +ALTER TABLE "public"."BulkActionItem" DROP CONSTRAINT IF EXISTS "BulkActionItem_sourceRunId_fkey"; +ALTER TABLE "public"."BulkActionItem" DROP CONSTRAINT IF EXISTS "BulkActionItem_destinationRunId_fkey"; +ALTER TABLE "public"."_TaskRunToTaskRunTag" DROP CONSTRAINT IF EXISTS "_TaskRunToTaskRunTag_A_fkey"; +ALTER TABLE "public"."TaskRunExecutionSnapshot" DROP CONSTRAINT IF EXISTS "TaskRunExecutionSnapshot_runId_fkey"; +ALTER TABLE "public"."Waitpoint" DROP CONSTRAINT IF EXISTS "Waitpoint_completedByTaskRunId_fkey"; +ALTER TABLE "public"."TaskRunWaitpoint" DROP CONSTRAINT IF EXISTS "TaskRunWaitpoint_taskRunId_fkey"; +ALTER TABLE "public"."_WaitpointRunConnections" DROP CONSTRAINT IF EXISTS "_WaitpointRunConnections_A_fkey"; +ALTER TABLE "public"."PlaygroundConversation" DROP CONSTRAINT IF EXISTS "PlaygroundConversation_runId_fkey"; From 1e606626901deb4b907a98cffea9f834848dd5db Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 12:36:00 +0100 Subject: [PATCH 36/83] feat(database): mirror TaskRun relations on TaskRunV2 Give TaskRunV2 the same relation surface as TaskRun (belongs-to plus child collections, with child relations sharing the existing scalar fields) so run reads through the store can include relations regardless of table. No DB foreign keys: stripped in production migrations and in the test harness. --- .../database/prisma/schema.prisma | 104 +++++++++++++++--- 1 file changed, 90 insertions(+), 14 deletions(-) diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 844d0da5aed..5668a5ac93c 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -366,6 +366,7 @@ model RuntimeEnvironment { backgroundWorkers BackgroundWorker[] backgroundWorkerTasks BackgroundWorkerTask[] taskRuns TaskRun[] + taskRunsV2 TaskRunV2[] @relation("taskRunsV2") taskQueues TaskQueue[] batchTaskRuns BatchTaskRun[] environmentVariableValues EnvironmentVariableValue[] @@ -453,6 +454,7 @@ model Project { backgroundWorkers BackgroundWorker[] backgroundWorkerTasks BackgroundWorkerTask[] taskRuns TaskRun[] + taskRunsV2 TaskRunV2[] @relation("taskRunsV2") runTags TaskRunTag[] taskQueues TaskQueue[] environmentVariables EnvironmentVariable[] @@ -560,6 +562,7 @@ model BackgroundWorker { tasks BackgroundWorkerTask[] attempts TaskRunAttempt[] lockedRuns TaskRun[] + lockedRunsV2 TaskRunV2[] @relation("lockedRunsV2") files BackgroundWorkerFile[] queues TaskQueue[] promptVersions PromptVersion[] @@ -695,6 +698,7 @@ model BackgroundWorkerTask { attempts TaskRunAttempt[] runs TaskRun[] + runsV2 TaskRunV2[] @relation("lockedRunsV2") queueConfig Json? retryConfig Json? @@ -742,7 +746,9 @@ model PlaygroundConversation { /// The current active run backing this conversation (null if no run yet) runId String? - run TaskRun? @relation(fields: [runId], references: [id], onDelete: SetNull, onUpdate: Cascade) + run TaskRun? @relation(fields: [runId], references: [id], onDelete: SetNull, onUpdate: Cascade, map: "PlaygroundConversation_runId_fkey") + /// Mirror relation to TaskRunV2 reusing the same runId scalar (FK stripped in prod) + runV2 TaskRunV2? @relation("playgroundConversationsV2", fields: [runId], references: [id], onDelete: SetNull, onUpdate: Cascade, map: "PlaygroundConversation_runId_v2_fkey") /// The client data JSON used for this conversation clientData Json? @@ -1131,10 +1137,12 @@ model TaskRunV2 { traceId String spanId String + runtimeEnvironment RuntimeEnvironment @relation("taskRunsV2", fields: [runtimeEnvironmentId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "task_run_v2_runtimeEnvironmentId_fkey") runtimeEnvironmentId String environmentType RuntimeEnvironmentType? + project Project @relation("taskRunsV2", fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "task_run_v2_projectId_fkey") projectId String organizationId String? @@ -1159,6 +1167,9 @@ model TaskRunV2 { createdAt DateTime @default(now()) updatedAt DateTime @updatedAt + attempts TaskRunAttempt[] @relation("attemptsV2") + tags TaskRunTag[] @relation("taskRunTagsV2") + /// Denormized column that holds the raw tags runTags String[] @@ -1167,6 +1178,8 @@ model TaskRunV2 { sdkVersion String? cliVersion String? + checkpoints Checkpoint[] @relation("checkpointsV2") + /// startedAt marks the point at which a run is dequeued from MarQS startedAt DateTime? /// executedAt is set when the first attempt is about to execute @@ -1179,8 +1192,10 @@ model TaskRunV2 { baseCostInCents Float @default(0) lockedAt DateTime? + lockedBy BackgroundWorkerTask? @relation("lockedRunsV2", fields: [lockedById], references: [id], map: "task_run_v2_lockedById_fkey") lockedById String? + lockedToVersion BackgroundWorker? @relation("lockedRunsV2", fields: [lockedToVersionId], references: [id], map: "task_run_v2_lockedToVersionId_fkey") lockedToVersionId String? /// The "priority" of the run. This is just a negative offset in ms for the queue timestamp @@ -1199,11 +1214,27 @@ model TaskRunV2 { /// optional token that can be used to authenticate the task run oneTimeUseToken String? + ///When this run is finished, the waitpoint will be marked as completed + associatedWaitpoint Waitpoint? @relation("CompletingRunV2") + + ///If there are any blocked waitpoints, the run won't be executed + blockedByWaitpoints TaskRunWaitpoint[] @relation("taskRunWaitpointsV2") + + /// All waitpoints that blocked this run at some point, used for display purposes + connectedWaitpoints Waitpoint[] @relation("WaitpointRunConnectionsV2") + /// Where the logs are stored taskEventStore String @default("taskEvent") queueTimestamp DateTime? + batchItems BatchTaskRunItem[] @relation("batchItemsV2") + dependency TaskRunDependency? @relation("dependencyV2") + CheckpointRestoreEvent CheckpointRestoreEvent[] @relation("checkpointRestoreEventsV2") + executionSnapshots TaskRunExecutionSnapshot[] @relation("executionSnapshotsV2") + + alerts ProjectAlert[] @relation("alertsV2") + scheduleInstanceId String? scheduleId String? @@ -1213,12 +1244,26 @@ model TaskRunV2 { replayedFromTaskRunFriendlyId String? + /// This represents the original task that that was triggered outside of a Trigger.dev task + rootTaskRun TaskRunV2? @relation("TaskRootRunV2", fields: [rootTaskRunId], references: [id], onDelete: SetNull, onUpdate: NoAction, map: "task_run_v2_rootTaskRunId_fkey") rootTaskRunId String? + /// The root run will have a list of all the descendant runs, children, grand children, etc. + descendantRuns TaskRunV2[] @relation("TaskRootRunV2") + + /// The immediate parent run of this task run + parentTaskRun TaskRunV2? @relation("TaskParentRunV2", fields: [parentTaskRunId], references: [id], onDelete: SetNull, onUpdate: NoAction, map: "task_run_v2_parentTaskRunId_fkey") parentTaskRunId String? + /// The immediate child runs of this task run + childRuns TaskRunV2[] @relation("TaskParentRunV2") + + /// The immediate parent attempt of this task run + parentTaskRunAttempt TaskRunAttempt? @relation("TaskParentRunAttemptV2", fields: [parentTaskRunAttemptId], references: [id], onDelete: SetNull, onUpdate: NoAction, map: "task_run_v2_parentTaskRunAttemptId_fkey") parentTaskRunAttemptId String? + /// The batch run that this task run is a part of + batch BatchTaskRun? @relation("batchRunsV2", fields: [batchId], references: [id], onDelete: SetNull, onUpdate: NoAction, map: "task_run_v2_batchId_fkey") batchId String? /// whether or not the task run was created because of a triggerAndWait for batchTriggerAndWait @@ -1270,6 +1315,11 @@ model TaskRunV2 { /// (OSS, or pre-backfill); reads fall back to the global basin. streamBasinName String? + sourceBulkActionItems BulkActionItem[] @relation("SourceActionItemRunV2") + destinationBulkActionItems BulkActionItem[] @relation("DestinationActionItemRunV2") + + playgroundConversations PlaygroundConversation[] @relation("playgroundConversationsV2") + @@unique([oneTimeUseToken]) @@unique([runtimeEnvironmentId, taskIdentifier, idempotencyKey]) // Finding child runs @@ -1407,7 +1457,9 @@ model TaskRunExecutionSnapshot { /// Run runId String - run TaskRun @relation(fields: [runId], references: [id]) + run TaskRun @relation(fields: [runId], references: [id], map: "TaskRunExecutionSnapshot_runId_fkey") + /// Mirror relation to TaskRunV2 reusing the same runId scalar (FK stripped in prod) + runV2 TaskRunV2 @relation("executionSnapshotsV2", fields: [runId], references: [id], map: "TaskRunExecutionSnapshot_runId_v2_fkey") runStatus TaskRunStatus // Batch @@ -1527,7 +1579,9 @@ model Waitpoint { /// If it's a RUN type waitpoint, this is the associated run completedByTaskRunId String? @unique - completedByTaskRun TaskRun? @relation("CompletingRun", fields: [completedByTaskRunId], references: [id], onDelete: SetNull) + completedByTaskRun TaskRun? @relation("CompletingRun", fields: [completedByTaskRunId], references: [id], onDelete: SetNull, map: "Waitpoint_completedByTaskRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same completedByTaskRunId scalar (FK stripped in prod) + completedByTaskRunV2 TaskRunV2? @relation("CompletingRunV2", fields: [completedByTaskRunId], references: [id], onDelete: SetNull, map: "Waitpoint_completedByTaskRunId_v2_fkey") /// If it's a DATETIME type waitpoint, this is the date. /// If it's a MANUAL waitpoint, this can be set as the `timeout`. @@ -1541,7 +1595,8 @@ model Waitpoint { blockingTaskRuns TaskRunWaitpoint[] /// All runs that have ever been blocked by this waitpoint, used for display purposes - connectedRuns TaskRun[] @relation("WaitpointRunConnections") + connectedRuns TaskRun[] @relation("WaitpointRunConnections") + connectedRunsV2 TaskRunV2[] @relation("WaitpointRunConnectionsV2") /// When a waitpoint is complete completedExecutionSnapshots TaskRunExecutionSnapshot[] @relation("completedWaitpoints") @@ -1592,7 +1647,9 @@ enum WaitpointStatus { model TaskRunWaitpoint { id String @id @default(cuid()) - taskRun TaskRun @relation(fields: [taskRunId], references: [id]) + taskRun TaskRun @relation(fields: [taskRunId], references: [id], map: "TaskRunWaitpoint_taskRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) + taskRunV2 TaskRunV2 @relation("taskRunWaitpointsV2", fields: [taskRunId], references: [id], map: "TaskRunWaitpoint_taskRunId_v2_fkey") taskRunId String waitpoint Waitpoint @relation(fields: [waitpointId], references: [id]) @@ -1756,7 +1813,8 @@ model TaskRunTag { friendlyId String @unique - runs TaskRun[] + runs TaskRun[] + runsV2 TaskRunV2[] @relation("taskRunTagsV2") project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) projectId String @@ -1773,7 +1831,9 @@ model TaskRunDependency { id String @id @default(cuid()) /// The child run - taskRun TaskRun @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) + taskRun TaskRun @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunDependency_taskRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) + taskRunV2 TaskRunV2 @relation("dependencyV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunDependency_taskRunId_v2_fkey") taskRunId String @unique checkpointEvent CheckpointRestoreEvent? @relation(fields: [checkpointEventId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -1821,7 +1881,9 @@ model TaskRunAttempt { friendlyId String @unique - taskRun TaskRun @relation("attempts", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) + taskRun TaskRun @relation("attempts", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunAttempt_taskRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) + taskRunV2 TaskRunV2 @relation("attemptsV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunAttempt_taskRunId_v2_fkey") taskRunId String backgroundWorker BackgroundWorker @relation(fields: [backgroundWorkerId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -1858,6 +1920,7 @@ model TaskRunAttempt { CheckpointRestoreEvent CheckpointRestoreEvent[] alerts ProjectAlert[] childRuns TaskRun[] @relation("TaskParentRunAttempt") + childRunsV2 TaskRunV2[] @relation("TaskParentRunAttemptV2") @@unique([taskRunId, number]) @@index([taskRunId]) @@ -2059,6 +2122,7 @@ model BatchTaskRun { runtimeEnvironmentId String /// This only includes new runs, not idempotent runs. runs TaskRun[] + runsV2 TaskRunV2[] @relation("batchRunsV2") createdAt DateTime @default(now()) updatedAt DateTime @updatedAt @@ -2142,7 +2206,9 @@ model BatchTaskRunItem { batchTaskRun BatchTaskRun @relation(fields: [batchTaskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) batchTaskRunId String - taskRun TaskRun @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) + taskRun TaskRun @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BatchTaskRunItem_taskRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) + taskRunV2 TaskRunV2 @relation("batchItemsV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BatchTaskRunItem_taskRunId_v2_fkey") taskRunId String taskRunAttempt TaskRunAttempt? @relation(fields: [taskRunAttemptId], references: [id], onDelete: SetNull, onUpdate: Cascade) @@ -2237,7 +2303,9 @@ model Checkpoint { events CheckpointRestoreEvent[] - run TaskRun @relation(fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade) + run TaskRun @relation(fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "Checkpoint_runId_fkey") + /// Mirror relation to TaskRunV2 reusing the same runId scalar (FK stripped in prod) + runV2 TaskRunV2 @relation("checkpointsV2", fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "Checkpoint_runId_v2_fkey") runId String attempt TaskRunAttempt @relation(fields: [attemptId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -2272,7 +2340,9 @@ model CheckpointRestoreEvent { checkpoint Checkpoint @relation(fields: [checkpointId], references: [id], onDelete: Cascade, onUpdate: Cascade) checkpointId String - run TaskRun @relation(fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade) + run TaskRun @relation(fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "CheckpointRestoreEvent_runId_fkey") + /// Mirror relation to TaskRunV2 reusing the same runId scalar (FK stripped in prod) + runV2 TaskRunV2 @relation("checkpointRestoreEventsV2", fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "CheckpointRestoreEvent_runId_v2_fkey") runId String attempt TaskRunAttempt @relation(fields: [attemptId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -2558,7 +2628,9 @@ model ProjectAlert { taskRunAttempt TaskRunAttempt? @relation(fields: [taskRunAttemptId], references: [id], onDelete: Cascade, onUpdate: Cascade) taskRunAttemptId String? - taskRun TaskRun? @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) + taskRun TaskRun? @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "ProjectAlert_taskRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) + taskRunV2 TaskRunV2? @relation("alertsV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "ProjectAlert_taskRunId_v2_fkey") taskRunId String? workerDeployment WorkerDeployment? @relation(fields: [workerDeploymentId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -2739,11 +2811,15 @@ model BulkActionItem { status BulkActionItemStatus @default(PENDING) /// The run that is the source of the action, e.g. when replaying this is the original run - sourceRun TaskRun @relation("SourceActionItemRun", fields: [sourceRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) + sourceRun TaskRun @relation("SourceActionItemRun", fields: [sourceRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BulkActionItem_sourceRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same sourceRunId scalar (FK stripped in prod) + sourceRunV2 TaskRunV2 @relation("SourceActionItemRunV2", fields: [sourceRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BulkActionItem_sourceRunId_v2_fkey") sourceRunId String /// The run that's a result of the action, this will be set when the run has been created - destinationRun TaskRun? @relation("DestinationActionItemRun", fields: [destinationRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) + destinationRun TaskRun? @relation("DestinationActionItemRun", fields: [destinationRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BulkActionItem_destinationRunId_fkey") + /// Mirror relation to TaskRunV2 reusing the same destinationRunId scalar (FK stripped in prod) + destinationRunV2 TaskRunV2? @relation("DestinationActionItemRunV2", fields: [destinationRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BulkActionItem_destinationRunId_v2_fkey") destinationRunId String? error String? From 0a591fb5ce347b8bb97a596ff8e5ffeb81da3f71 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 12:36:00 +0100 Subject: [PATCH 37/83] test(testcontainers): strip run foreign keys after schema push Production drops the foreign keys on and referencing the run tables, but the test harness builds via prisma db push, which recreates them from the schema relations. Drop them after the push so test databases match production and a run can live in either run table. --- internal-packages/testcontainers/src/utils.ts | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/internal-packages/testcontainers/src/utils.ts b/internal-packages/testcontainers/src/utils.ts index 4183e85b40b..9cbaeb04ce6 100644 --- a/internal-packages/testcontainers/src/utils.ts +++ b/internal-packages/testcontainers/src/utils.ts @@ -2,6 +2,7 @@ import { createClient } from "@clickhouse/client"; import { PostgreSqlContainer, StartedPostgreSqlContainer } from "@testcontainers/postgresql"; import { RedisContainer, StartedRedisContainer } from "@testcontainers/redis"; import { tryCatch } from "@trigger.dev/core"; +import { PrismaClient } from "@trigger.dev/database"; import Redis from "ioredis"; import path from "path"; import { isDebug } from "std-env"; @@ -48,9 +49,50 @@ export async function pushDatabaseSchema(databaseUrl: string) { } ); + await dropRunForeignKeys(databaseUrl); + return result; } +/** + * Production drops every foreign key that sits on, or points at, the run tables (`TaskRun` and + * `task_run_v2`) — a run's id is just a scalar that may live in either physical table, so the FKs + * can't be enforced. `prisma db push` doesn't know that: it recreates a constraint for every + * relation still declared in schema.prisma, so the template DB ends up with run FKs production + * doesn't have. That makes tests diverge — e.g. inserting a child row (a `TaskRunExecutionSnapshot` + * whose `runId` is a `task_run_v2` id) trips a `..._runId_fkey -> TaskRun` constraint that doesn't + * exist in prod. So after the push we strip those FKs to match production exactly. + * + * This is done dynamically (rather than naming each constraint) so any relation added to the schema + * later has its test-only run FK stripped automatically. It only removes FK constraints, so it + * cannot corrupt valid data — it makes the template DB strictly more faithful to production. + */ +async function dropRunForeignKeys(databaseUrl: string) { + const prisma = new PrismaClient({ + datasources: { db: { url: databaseUrl } }, + }); + + try { + await prisma.$executeRawUnsafe(` +DO $$ +DECLARE r record; +BEGIN + FOR r IN + SELECT conrelid::regclass::text AS tbl, conname + FROM pg_constraint + WHERE contype = 'f' + AND (confrelid IN ('"TaskRun"'::regclass, 'task_run_v2'::regclass) + OR conrelid IN ('"TaskRun"'::regclass, 'task_run_v2'::regclass)) + LOOP + EXECUTE format('ALTER TABLE %s DROP CONSTRAINT %I', r.tbl, r.conname); + END LOOP; +END $$; +`); + } finally { + await prisma.$disconnect(); + } +} + /** * Caps each container's CPU/memory to approximate the 2-core CI runner locally (for timing + flake * reproduction). Set TESTCONTAINERS_CPU (cores per container, e.g. "2") and/or From f8c1a04401f849f7da036a068188d72c1a839184 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 12:43:17 +0100 Subject: [PATCH 38/83] feat(run-store): route run reads and writes by id format Select the TaskRun or task_run_v2 table per operation from the run id's format (KSUID routes to v2, anything else to legacy) via a runModel helper, so a run is read and written in its own table. Batch and predicate-keyed operations span both tables. Behavior-preserving for legacy runs. --- .../run-store/src/PostgresRunStore.test.ts | 673 ++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 201 ++++-- 2 files changed, 821 insertions(+), 53 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 47876b70c8d..d4f5b851f22 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -1,4 +1,5 @@ import { postgresTest } from "@internal/testcontainers"; +import { isKsuidId, RunId } from "@trigger.dev/core/v3/isomorphic"; import type { PrismaClient } from "@trigger.dev/database"; import { describe, expect } from "vitest"; import { PostgresRunStore } from "./PostgresRunStore.js"; @@ -1772,3 +1773,675 @@ describe("PostgresRunStore — read", () => { expect(found[0]?.payloadType).toBe("application/json"); }); }); + +describe("PostgresRunStore — table routing by id format", () => { + // Seed a run directly into one physical table, choosing the delegate by id + // format the same way the store does. Returns the ids used. + async function seedRoutedRun( + prisma: PrismaClient, + params: { + id: string; + friendlyId: string; + organizationId: string; + projectId: string; + runtimeEnvironmentId: string; + status?: string; + idempotencyKey?: string; + taskIdentifier?: string; + } + ) { + const delegate = isKsuidId(params.id) + ? (prisma.taskRunV2 as unknown as typeof prisma.taskRun) + : prisma.taskRun; + + await delegate.create({ + data: { + id: params.id, + engine: "V2", + status: (params.status as any) ?? "PENDING", + friendlyId: params.friendlyId, + runtimeEnvironmentId: params.runtimeEnvironmentId, + environmentType: "DEVELOPMENT", + organizationId: params.organizationId, + projectId: params.projectId, + taskIdentifier: params.taskIdentifier ?? "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${params.id}`, + spanId: `span_${params.id}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + ...(params.idempotencyKey !== undefined && { idempotencyKey: params.idempotencyKey }), + }, + }); + } + + postgresTest( + "createRun with a cuid id lands a row in TaskRun and NOT in task_run_v2", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const cuid = RunId.generate(); + expect(isKsuidId(cuid.id)).toBe(false); + + await store.createRun({ + data: { + id: cuid.id, + engine: "V2", + status: "PENDING", + friendlyId: cuid.friendlyId, + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: "trace_cuid", + spanId: "span_cuid", + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + snapshot: { + engine: "V2", + executionStatus: "RUN_CREATED", + description: "Run was created", + runStatus: "PENDING", + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + }, + }); + + // cuid run is in TaskRun, not in task_run_v2. + const legacyRow = await prisma.taskRun.findUnique({ where: { id: cuid.id } }); + expect(legacyRow).not.toBeNull(); + const cuidInV2 = await prisma.taskRunV2.findUnique({ where: { id: cuid.id } }); + expect(cuidInV2).toBeNull(); + } + ); + + postgresTest( + "createRun routes a KSUID id to task_run_v2: the scalar row lands there and not in TaskRun", + async ({ prisma }) => { + // This test exercises the routing decision in isolation by writing the + // scalar row directly to the table `createRun` would pick for a KSUID + // `data.id`, then asserts the row landed in task_run_v2 and not in TaskRun. + // The full v2 create path (run + nested snapshot + waitpoint) is covered + // by the "v2 nested writes" suite below. + const { organization, project, environment } = await seedEnvironment(prisma); + + const ksuid = RunId.generateKsuid(); + expect(isKsuidId(ksuid.id)).toBe(true); + + await seedRoutedRun(prisma, { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + + const v2Row = await prisma.taskRunV2.findUnique({ where: { id: ksuid.id } }); + expect(v2Row).not.toBeNull(); + const ksuidInLegacy = await prisma.taskRun.findUnique({ where: { id: ksuid.id } }); + expect(ksuidInLegacy).toBeNull(); + } + ); + + postgresTest( + "findRun and updateMetadata route to task_run_v2 for a KSUID run and to TaskRun for a cuid run", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const ksuid = RunId.generateKsuid(); + const cuid = RunId.generate(); + + await seedRoutedRun(prisma, { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + await seedRoutedRun(prisma, { + id: cuid.id, + friendlyId: cuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + + // By-id read finds each run in its own table. + const foundKsuid = await store.findRun({ id: ksuid.id }, { select: { id: true } }); + expect(foundKsuid?.id).toBe(ksuid.id); + const foundCuid = await store.findRun({ id: cuid.id }, { select: { id: true } }); + expect(foundCuid?.id).toBe(cuid.id); + + // By-id write (updateMetadata) lands in the correct table. + const ksuidResult = await store.updateMetadata( + ksuid.id, + { + metadata: '{"routed":"v2"}', + metadataType: "application/json", + metadataVersion: { increment: 1 }, + updatedAt: new Date(), + }, + {} + ); + expect(ksuidResult.count).toBe(1); + + const cuidResult = await store.updateMetadata( + cuid.id, + { + metadata: '{"routed":"legacy"}', + metadataType: "application/json", + metadataVersion: { increment: 1 }, + updatedAt: new Date(), + }, + {} + ); + expect(cuidResult.count).toBe(1); + + // The write hit task_run_v2 for the KSUID run … + const v2Row = await prisma.taskRunV2.findUniqueOrThrow({ + where: { id: ksuid.id }, + select: { metadata: true }, + }); + expect(v2Row.metadata).toBe('{"routed":"v2"}'); + + // … and TaskRun for the cuid run. + const legacyRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: cuid.id }, + select: { metadata: true }, + }); + expect(legacyRow.metadata).toBe('{"routed":"legacy"}'); + } + ); + + postgresTest( + "expireRunsBatch with a mixed array updates both tables and returns the combined count", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const ksuid = RunId.generateKsuid(); + const cuid = RunId.generate(); + + await seedRoutedRun(prisma, { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + await seedRoutedRun(prisma, { + id: cuid.id, + friendlyId: cuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + + const now = new Date("2026-06-19T12:00:00.000Z"); + const error = { type: "STRING_ERROR" as const, raw: "Run expired because the TTL was reached" }; + + const count = await store.expireRunsBatch([ksuid.id, cuid.id], { error, now }); + + expect(count).toBe(2); + + const v2Row = await prisma.taskRunV2.findUniqueOrThrow({ + where: { id: ksuid.id }, + select: { status: true, completedAt: true, expiredAt: true }, + }); + expect(v2Row.status).toBe("EXPIRED"); + expect(v2Row.completedAt).toEqual(now); + expect(v2Row.expiredAt).toEqual(now); + + const legacyRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: cuid.id }, + select: { status: true, completedAt: true, expiredAt: true }, + }); + expect(legacyRow.status).toBe("EXPIRED"); + expect(legacyRow.completedAt).toEqual(now); + expect(legacyRow.expiredAt).toEqual(now); + } + ); +}); + +describe("PostgresRunStore — v2 nested writes (run + related rows via nested Prisma create)", () => { + // `task_run_v2` is a full clone of `TaskRun` down to its relations, so the nested Prisma + // create/include used by createRun/lifecycle methods targets it unchanged via the runModel cast. + // The child->run foreign keys (TaskRunExecutionSnapshot.runId, Waitpoint.completedByTaskRunId, …) + // are dropped in production and by the testcontainer harness, so a child row can reference a run + // in EITHER physical table (TaskRun or task_run_v2) by plain scalar id without a FK violation. + + function runAssociatedWaitpoint(params: { + id: string; + friendlyId: string; + projectId: string; + environmentId: string; + }) { + return { + id: params.id, + friendlyId: params.friendlyId, + type: "RUN" as const, + status: "PENDING" as const, + idempotencyKey: `idem_${params.id}`, + userProvidedIdempotencyKey: false, + projectId: params.projectId, + environmentId: params.environmentId, + }; + } + + postgresTest( + "createRun for a KSUID run lands the run in task_run_v2, creates its snapshot keyed to the v2 run id, and creates the associated waitpoint", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const ksuid = RunId.generateKsuid(); + expect(isKsuidId(ksuid.id)).toBe(true); + + const input: CreateRunInput = { + ...buildCreateRunInput({ + runId: ksuid.id, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }), + associatedWaitpoint: runAssociatedWaitpoint({ + id: "wp_v2_create_1", + friendlyId: "wp_v2_create_friendly_1", + projectId: project.id, + environmentId: environment.id, + }), + }; + input.data.friendlyId = ksuid.friendlyId; + + const run = await store.createRun(input); + + // Returns the TaskRunWithWaitpoint shape with the associated waitpoint included. + expect(run.id).toBe(ksuid.id); + expect(run.status).toBe("PENDING"); + expect(run.associatedWaitpoint).not.toBeNull(); + expect(run.associatedWaitpoint?.id).toBe("wp_v2_create_1"); + + // The run row landed in task_run_v2, not TaskRun. + const v2Row = await prisma.taskRunV2.findUnique({ where: { id: ksuid.id } }); + expect(v2Row).not.toBeNull(); + const legacyRow = await prisma.taskRun.findUnique({ where: { id: ksuid.id } }); + expect(legacyRow).toBeNull(); + + // The execution snapshot is keyed to the v2 run id (in the shared snapshot table). + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId: ksuid.id }, + }); + expect(snapshots).toHaveLength(1); + expect(snapshots[0]?.executionStatus).toBe("RUN_CREATED"); + expect(snapshots[0]?.runStatus).toBe("PENDING"); + + // The waitpoint points back at the v2 run via the scalar FK column. + const waitpoint = await prisma.waitpoint.findUnique({ where: { id: "wp_v2_create_1" } }); + expect(waitpoint?.completedByTaskRunId).toBe(ksuid.id); + } + ); + + postgresTest( + "v2 lifecycle: startAttempt then completeAttemptSuccess creates the completion snapshot keyed to the v2 run id", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const ksuid = RunId.generateKsuid(); + + const input = buildCreateRunInput({ + runId: ksuid.id, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + input.data.friendlyId = ksuid.friendlyId; + + await store.createRun(input); + + const started = await store.startAttempt( + ksuid.id, + { attemptNumber: 1, isWarmStart: false }, + { select: { id: true, status: true, attemptNumber: true } } + ); + expect(started.status).toBe("EXECUTING"); + expect(started.attemptNumber).toBe(1); + + const completedAt = new Date("2026-06-19T11:00:00.000Z"); + const completed = await store.completeAttemptSuccess( + ksuid.id, + { + completedAt, + output: '{"ok":true}', + outputType: "application/json", + usageDurationMs: 250, + costInCents: 4, + snapshot: { + executionStatus: "FINISHED", + description: "Task completed successfully", + runStatus: "COMPLETED_SUCCESSFULLY", + attemptNumber: 1, + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + }, + }, + { select: { id: true, status: true, completedAt: true, usageDurationMs: true, costInCents: true } } + ); + + expect(completed.id).toBe(ksuid.id); + expect(completed.status).toBe("COMPLETED_SUCCESSFULLY"); + expect(completed.completedAt).toEqual(completedAt); + expect(completed.usageDurationMs).toBe(250); + expect(completed.costInCents).toBe(4); + + // The run row updated in task_run_v2. + const v2Row = await prisma.taskRunV2.findUniqueOrThrow({ + where: { id: ksuid.id }, + select: { status: true }, + }); + expect(v2Row.status).toBe("COMPLETED_SUCCESSFULLY"); + + // The completion snapshot is keyed to the v2 run id. + const finished = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId: ksuid.id, executionStatus: "FINISHED" }, + }); + expect(finished).toHaveLength(1); + expect(finished[0]?.runStatus).toBe("COMPLETED_SUCCESSFULLY"); + } + ); + + postgresTest( + "createFailedRun for a KSUID run lands the run in task_run_v2 and creates the associated waitpoint keyed to the v2 run id", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const ksuid = RunId.generateKsuid(); + const completedAt = new Date("2026-06-19T00:00:00.000Z"); + const error = { type: "STRING_ERROR", raw: "system failure" }; + + const input: CreateFailedRunInput = { + data: { + id: ksuid.id, + engine: "V2", + status: "SYSTEM_FAILURE", + friendlyId: ksuid.friendlyId, + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "trace_v2_failed", + spanId: "span_v2_failed", + queue: "task/my-task", + isTest: false, + completedAt, + error: error as unknown as import("@trigger.dev/database").Prisma.InputJsonObject, + depth: 0, + taskEventStore: "taskEvent", + }, + associatedWaitpoint: runAssociatedWaitpoint({ + id: "wp_v2_failed_1", + friendlyId: "wp_v2_failed_friendly_1", + projectId: project.id, + environmentId: environment.id, + }), + }; + + const run = await store.createFailedRun(input); + + expect(run.id).toBe(ksuid.id); + expect(run.status).toBe("SYSTEM_FAILURE"); + expect(run.associatedWaitpoint).not.toBeNull(); + expect(run.associatedWaitpoint?.id).toBe("wp_v2_failed_1"); + + const v2Row = await prisma.taskRunV2.findUnique({ where: { id: ksuid.id } }); + expect(v2Row).not.toBeNull(); + const legacyRow = await prisma.taskRun.findUnique({ where: { id: ksuid.id } }); + expect(legacyRow).toBeNull(); + + const waitpoint = await prisma.waitpoint.findUnique({ where: { id: "wp_v2_failed_1" } }); + expect(waitpoint?.completedByTaskRunId).toBe(ksuid.id); + } + ); + + postgresTest( + "createRun for a legacy cuid run with an associated waitpoint creates the run, its snapshot, and the waitpoint (regression: identical rows/shape)", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const cuid = RunId.generate(); + expect(isKsuidId(cuid.id)).toBe(false); + + const input: CreateRunInput = { + ...buildCreateRunInput({ + runId: cuid.id, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }), + associatedWaitpoint: runAssociatedWaitpoint({ + id: "wp_legacy_create_1", + friendlyId: "wp_legacy_create_friendly_1", + projectId: project.id, + environmentId: environment.id, + }), + }; + input.data.friendlyId = cuid.friendlyId; + + const run = await store.createRun(input); + + // Same TaskRunWithWaitpoint shape as before. + expect(run.id).toBe(cuid.id); + expect(run.status).toBe("PENDING"); + expect(run.associatedWaitpoint?.id).toBe("wp_legacy_create_1"); + + // Legacy run is in TaskRun, not task_run_v2. + const legacyRow = await prisma.taskRun.findUnique({ where: { id: cuid.id } }); + expect(legacyRow).not.toBeNull(); + const v2Row = await prisma.taskRunV2.findUnique({ where: { id: cuid.id } }); + expect(v2Row).toBeNull(); + + // Snapshot keyed to the run, waitpoint linked back via the FK column. + const snapshots = await prisma.taskRunExecutionSnapshot.findMany({ + where: { runId: cuid.id }, + }); + expect(snapshots).toHaveLength(1); + expect(snapshots[0]?.executionStatus).toBe("RUN_CREATED"); + + const waitpoint = await prisma.waitpoint.findUnique({ where: { id: "wp_legacy_create_1" } }); + expect(waitpoint?.completedByTaskRunId).toBe(cuid.id); + + // The FK still being live for the legacy table proves the waitpoint really + // resolves to a TaskRun row (the regression path is unchanged). + const reloaded = await prisma.taskRun.findUniqueOrThrow({ + where: { id: cuid.id }, + include: { associatedWaitpoint: true }, + }); + expect(reloaded.associatedWaitpoint?.id).toBe("wp_legacy_create_1"); + } + ); + + postgresTest( + "createRun is atomic: a second create with the same id throws and leaves no dangling snapshot", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const cuid = RunId.generate(); + const input = buildCreateRunInput({ + runId: cuid.id, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + input.data.friendlyId = cuid.friendlyId; + + await store.createRun(input); + + const before = await prisma.taskRunExecutionSnapshot.count({ where: { runId: cuid.id } }); + expect(before).toBe(1); + + // A second createRun with the same id fails the unique-id insert and + // propagates the error. Because the run row and its snapshot are written by + // one nested Prisma create, the rollback leaves no extra snapshot behind. + await expect(store.createRun(input)).rejects.toThrow(); + + const after = await prisma.taskRunExecutionSnapshot.count({ where: { runId: cuid.id } }); + expect(after).toBe(1); + } + ); + + postgresTest( + "lockRunToWorker for a KSUID run returns the run with runtimeEnvironment hydrated via include (no manual stitch)", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const ksuid = RunId.generateKsuid(); + expect(isKsuidId(ksuid.id)).toBe(true); + + const input = buildCreateRunInput({ + runId: ksuid.id, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + input.data.friendlyId = ksuid.friendlyId; + + await store.createRun(input); + + const backgroundWorker = await prisma.backgroundWorker.create({ + data: { + friendlyId: "worker_friendly_v2", + version: "20260601.1", + runtimeEnvironmentId: environment.id, + projectId: project.id, + contentHash: "abc123v2", + sdkVersion: "3.0.0", + cliVersion: "3.0.0", + metadata: {}, + }, + }); + + const workerTask = await prisma.backgroundWorkerTask.create({ + data: { + friendlyId: "task_friendly_v2", + slug: "my-task", + filePath: "src/my-task.ts", + exportName: "myTask", + workerId: backgroundWorker.id, + runtimeEnvironmentId: environment.id, + projectId: project.id, + }, + }); + + const queue = await prisma.taskQueue.create({ + data: { + friendlyId: "queue_friendly_v2", + name: "task/my-task", + runtimeEnvironmentId: environment.id, + projectId: project.id, + }, + }); + + const lockedAt = new Date("2026-06-19T13:00:00.000Z"); + const startedAt = new Date("2026-06-19T13:00:01.000Z"); + const snapshotId = "snap_lock_v2_1"; + + const locked = await store.lockRunToWorker(ksuid.id, { + lockedAt, + lockedById: workerTask.id, + lockedToVersionId: backgroundWorker.id, + lockedQueueId: queue.id, + startedAt, + baseCostInCents: 5, + machinePreset: "small-1x", + taskVersion: "20260601.1", + sdkVersion: "3.0.0", + cliVersion: "3.0.0", + maxDurationInSeconds: null, + snapshot: { + id: snapshotId, + previousSnapshotId: undefined, + environmentId: environment.id, + environmentType: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + completedWaitpointIds: [], + completedWaitpointOrder: [], + }, + }); + + expect(locked.status).toBe("DEQUEUED"); + // The relation is hydrated by the nested `include`, not stitched manually. + expect(locked.runtimeEnvironment).toBeDefined(); + expect(locked.runtimeEnvironment.id).toBe(environment.id); + + // The run row landed (and was updated) in task_run_v2. + const v2Row = await prisma.taskRunV2.findUniqueOrThrow({ + where: { id: ksuid.id }, + select: { status: true }, + }); + expect(v2Row.status).toBe("DEQUEUED"); + + // The dequeue snapshot is keyed to the v2 run id. + const snap = await prisma.taskRunExecutionSnapshot.findUnique({ where: { id: snapshotId } }); + expect(snap?.executionStatus).toBe("PENDING_EXECUTING"); + } + ); + + postgresTest( + "findRun with a runtimeEnvironment include resolves the relation for a KSUID run", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const ksuid = RunId.generateKsuid(); + const input = buildCreateRunInput({ + runId: ksuid.id, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + input.data.friendlyId = ksuid.friendlyId; + + await store.createRun(input); + + const run = await store.findRun({ id: ksuid.id }, { include: { runtimeEnvironment: true } }); + + expect(run?.id).toBe(ksuid.id); + expect(run?.runtimeEnvironment).toBeDefined(); + expect(run?.runtimeEnvironment.id).toBe(environment.id); + } + ); +}); diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index fcc53c00266..c800386b720 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -20,6 +20,7 @@ import type { TaskRunWithWaitpoint, } from "./types.js"; import type { TaskRunError } from "@trigger.dev/core/v3/schemas"; +import { isKsuidId } from "@trigger.dev/core/v3/isomorphic"; export type PostgresRunStoreOptions = { prisma: PrismaClient; @@ -27,12 +28,17 @@ export type PostgresRunStoreOptions = { }; /** - * Typed write layer for the task-run row, backed by the `taskRun` Prisma model. + * Typed write layer for the task-run row. A run lives in one of two physical + * tables chosen by its id format (`runModel`): the legacy `taskRun`, or the + * `task_run_v2` clone. `task_run_v2` carries the same relation surface as + * `TaskRun`, so a method's nested Prisma create/include (execution snapshot, + * associated waitpoint, `runtimeEnvironment`) targets either table unchanged + * once the delegate comes from `runModel`. * - * Each method is a verbatim relocation of the Prisma statement that lives at a - * specific call site today. Methods write through `(tx ?? this.prisma).taskRun` + * Each method is its original single-table Prisma statement with the run + * delegate routed through `runModel`. Methods write through `tx` when supplied * so callers can opt into an existing transaction. Errors (including unique - * constraint violations) propagate to the caller unchanged. + * constraint violations) propagate unchanged. */ export class PostgresRunStore implements RunStore { private readonly prisma: PrismaClient; @@ -43,13 +49,50 @@ export class PostgresRunStore implements RunStore { this.readOnlyPrisma = options.readOnlyPrisma; } + /** + * A run lives in exactly one physical table, chosen by the FORMAT of its id: + * a KSUID id (new) lives in `task_run_v2`, the legacy cuid id in `TaskRun`. + * `task_run_v2` is an identical clone of `TaskRun` down to its relations, so + * its delegate is cast to the `taskRun` delegate type to reuse the existing + * generic `select`/`include`/nested-write passthrough unchanged. + */ + private runModel(client: PrismaClientOrTransaction, idOrFriendlyId: string) { + return isKsuidId(idOrFriendlyId) + ? (client.taskRunV2 as unknown as typeof client.taskRun) + : client.taskRun; + } + + /** + * Route a single-row read to its physical table from the routing key in the + * `where` clause. `findRun`/`findRunOrThrow` are always called with a + * `{ id }` or `{ friendlyId }` predicate; both carry the same KSUID/cuid body + * and route identically. When neither is a plain string (e.g. an unexpected + * predicate-only read), default to the legacy `taskRun` table — matching the + * pre-split single-table behavior. + */ + #runReadModel( + prisma: PrismaClientOrTransaction | PrismaReplicaClient, + where: Prisma.TaskRunWhereInput + ) { + const routingKey = + typeof where.id === "string" + ? where.id + : typeof where.friendlyId === "string" + ? where.friendlyId + : undefined; + + return routingKey !== undefined && isKsuidId(routingKey) + ? (prisma.taskRunV2 as unknown as typeof prisma.taskRun) + : prisma.taskRun; + } + async createRun( params: CreateRunInput, tx?: PrismaClientOrTransaction ): Promise { const client = tx ?? this.prisma; - return client.taskRun.create({ + return this.runModel(client, params.data.id).create({ include: { associatedWaitpoint: true, }, @@ -84,7 +127,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const client = tx ?? this.prisma; - return client.taskRun.create({ + return this.runModel(client, params.data.id).create({ data: { ...params.data, executionSnapshots: { @@ -111,7 +154,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const client = tx ?? this.prisma; - return client.taskRun.create({ + return this.runModel(client, params.data.id).create({ include: { associatedWaitpoint: true, }, @@ -134,7 +177,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "EXECUTING", @@ -161,7 +204,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "COMPLETED_SUCCESSFULLY", @@ -197,7 +240,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { machinePreset: data.machinePreset, @@ -215,7 +258,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "PENDING" }, select: args.select, @@ -229,7 +272,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = tx ?? this.prisma; - await prisma.taskRun.update({ + await this.runModel(prisma, runId).update({ where: { id: runId }, data: { bulkActionGroupIds: { @@ -253,7 +296,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "CANCELED", @@ -283,7 +326,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: data.status, @@ -304,7 +347,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "EXPIRED", @@ -341,15 +384,41 @@ export class PostgresRunStore implements RunStore { return 0; } - return prisma.$executeRaw` - UPDATE "TaskRun" - SET "status" = 'EXPIRED'::"TaskRunStatus", - "completedAt" = ${data.now}, - "expiredAt" = ${data.now}, - "updatedAt" = ${data.now}, - "error" = ${JSON.stringify(data.error)}::jsonb - WHERE "id" IN (${Prisma.join(runIds)}) - `; + // A run lives in exactly one table, chosen by its id format. The array may + // be mixed, so partition it and run the UPDATE once per non-empty partition + // on its own table, then sum the counts. + const v2Ids = runIds.filter((id) => isKsuidId(id)); + const legacyIds = runIds.filter((id) => !isKsuidId(id)); + + const error = JSON.stringify(data.error); + + let count = 0; + + if (legacyIds.length > 0) { + count += await prisma.$executeRaw` + UPDATE "TaskRun" + SET "status" = 'EXPIRED'::"TaskRunStatus", + "completedAt" = ${data.now}, + "expiredAt" = ${data.now}, + "updatedAt" = ${data.now}, + "error" = ${error}::jsonb + WHERE "id" IN (${Prisma.join(legacyIds)}) + `; + } + + if (v2Ids.length > 0) { + count += await prisma.$executeRaw` + UPDATE "task_run_v2" + SET "status" = 'EXPIRED'::"TaskRunStatus", + "completedAt" = ${data.now}, + "expiredAt" = ${data.now}, + "updatedAt" = ${data.now}, + "error" = ${error}::jsonb + WHERE "id" IN (${Prisma.join(v2Ids)}) + `; + } + + return count; } async lockRunToWorker( @@ -359,7 +428,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "DEQUEUED", @@ -403,7 +472,7 @@ export class PostgresRunStore implements RunStore { include: { runtimeEnvironment: true, }, - }); + }) as Promise>; } async parkPendingVersion( @@ -414,7 +483,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "PENDING_VERSION", @@ -430,7 +499,7 @@ export class PostgresRunStore implements RunStore { ): Promise<{ count: number }> { const prisma = tx ?? this.prisma; - const result = await prisma.taskRun.updateMany({ + const result = await this.runModel(prisma, runId).updateMany({ where: { id: runId, status: "PENDING_VERSION" }, data: { status: "PENDING" }, }); @@ -445,7 +514,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "WAITING_TO_RESUME" }, include: args.include, @@ -459,7 +528,7 @@ export class PostgresRunStore implements RunStore { ): Promise> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "EXECUTING" }, select: args.select, @@ -473,7 +542,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { delayUntil: data.delayUntil, @@ -503,7 +572,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data: { status: "PENDING", @@ -519,7 +588,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId }, data, include: { @@ -540,16 +609,17 @@ export class PostgresRunStore implements RunStore { tx?: PrismaClientOrTransaction ): Promise<{ count: number }> { const prisma = tx ?? this.prisma; + const model = this.runModel(prisma, runId); if (options.expectedMetadataVersion !== undefined) { - const result = await prisma.taskRun.updateMany({ + const result = await model.updateMany({ where: { id: runId, metadataVersion: options.expectedMetadataVersion }, data, }); return { count: result.count }; } - await prisma.taskRun.update({ + await model.update({ where: { id: runId }, data, }); @@ -563,7 +633,7 @@ export class PostgresRunStore implements RunStore { const prisma = tx ?? this.prisma; if (params.byId) { - const result = await prisma.taskRun.updateMany({ + const result = await this.runModel(prisma, params.byId.runId).updateMany({ where: { id: params.byId.runId, idempotencyKey: params.byId.idempotencyKey }, data: { idempotencyKey: null, idempotencyKeyExpiresAt: null }, }); @@ -571,23 +641,48 @@ export class PostgresRunStore implements RunStore { } if (params.byPredicate) { + // No run id to route by: a matching run could be in either table during + // the mixed window, so run the predicate against both and sum the counts. + const where = { + idempotencyKey: params.byPredicate.idempotencyKey, + taskIdentifier: params.byPredicate.taskIdentifier, + runtimeEnvironmentId: params.byPredicate.runtimeEnvironmentId, + }; + const data = { idempotencyKey: null, idempotencyKeyExpiresAt: null }; + + const [legacy, v2] = await Promise.all([ + prisma.taskRun.updateMany({ where, data }), + (prisma.taskRunV2 as unknown as typeof prisma.taskRun).updateMany({ where, data }), + ]); + + return { count: legacy.count + v2.count }; + } + + // byFriendlyIds — only clears idempotencyKey, not idempotencyKeyExpiresAt. + // The friendlyId carries the same KSUID/cuid body as the id, so it routes + // the same way; partition the (possibly mixed) array and sum the counts. + const v2FriendlyIds = params.byFriendlyIds.filter((friendlyId) => isKsuidId(friendlyId)); + const legacyFriendlyIds = params.byFriendlyIds.filter((friendlyId) => !isKsuidId(friendlyId)); + + let count = 0; + + if (legacyFriendlyIds.length > 0) { const result = await prisma.taskRun.updateMany({ - where: { - idempotencyKey: params.byPredicate.idempotencyKey, - taskIdentifier: params.byPredicate.taskIdentifier, - runtimeEnvironmentId: params.byPredicate.runtimeEnvironmentId, - }, - data: { idempotencyKey: null, idempotencyKeyExpiresAt: null }, + where: { friendlyId: { in: legacyFriendlyIds } }, + data: { idempotencyKey: null }, }); - return { count: result.count }; + count += result.count; } - // byFriendlyIds — only clears idempotencyKey, not idempotencyKeyExpiresAt - const result = await prisma.taskRun.updateMany({ - where: { friendlyId: { in: params.byFriendlyIds } }, - data: { idempotencyKey: null }, - }); - return { count: result.count }; + if (v2FriendlyIds.length > 0) { + const result = await (prisma.taskRunV2 as unknown as typeof prisma.taskRun).updateMany({ + where: { friendlyId: { in: v2FriendlyIds } }, + data: { idempotencyKey: null }, + }); + count += result.count; + } + + return { count }; } async pushTags( @@ -598,7 +693,7 @@ export class PostgresRunStore implements RunStore { ): Promise<{ updatedAt: Date }> { const prisma = tx ?? this.prisma; - return prisma.taskRun.update({ + return this.runModel(prisma, runId).update({ where: { id: runId, runtimeEnvironmentId: where.runtimeEnvironmentId }, data: { runTags: { push: tags } }, select: { updatedAt: true }, @@ -612,7 +707,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = tx ?? this.prisma; - await prisma.taskRun.update({ + await this.runModel(prisma, runId).update({ where: { id: runId }, data: { realtimeStreams: { push: streamId } }, }); @@ -639,7 +734,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const { args, prisma } = this.#resolveReadArgs(argsOrClient, client); - return prisma.taskRun.findFirst({ + return this.#runReadModel(prisma, where).findFirst({ where, ...args, }); @@ -666,7 +761,7 @@ export class PostgresRunStore implements RunStore { ): Promise { const { args, prisma } = this.#resolveReadArgs(argsOrClient, client); - return prisma.taskRun.findFirstOrThrow({ + return this.#runReadModel(prisma, where).findFirstOrThrow({ where, ...args, }); From e1743415f16f094aa0eda999e6204040e47b9408 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 16:46:55 +0100 Subject: [PATCH 39/83] feat(run-store): both-table merged keyset cursor for findRuns findRuns now queries both TaskRun and task_run_v2 and merges the two ordered streams into one result. Ordered, limited reads require a time-based key (createdAt) because cuid and ksuid ids do not sort into a shared range, so id alone cannot order the union. --- .../run-store/src/PostgresRunStore.test.ts | 288 +++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 293 +++++++++++++++++- 2 files changed, 580 insertions(+), 1 deletion(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index d4f5b851f22..a5866fa5d81 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -1788,6 +1788,7 @@ describe("PostgresRunStore — table routing by id format", () => { status?: string; idempotencyKey?: string; taskIdentifier?: string; + createdAt?: Date; } ) { const delegate = isKsuidId(params.id) @@ -1815,6 +1816,7 @@ describe("PostgresRunStore — table routing by id format", () => { taskEventStore: "taskEvent", depth: 0, ...(params.idempotencyKey !== undefined && { idempotencyKey: params.idempotencyKey }), + ...(params.createdAt !== undefined && { createdAt: params.createdAt }), }, }); } @@ -2016,6 +2018,292 @@ describe("PostgresRunStore — table routing by id format", () => { expect(legacyRow.expiredAt).toEqual(now); } ); + + postgresTest( + "findRuns (unordered) returns runs from BOTH TaskRun and task_run_v2 in one env", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // Two legacy (cuid) runs + two new (ksuid) runs in the SAME env. + const legacyA = RunId.generate(); + const legacyB = RunId.generate(); + const v2A = RunId.generateKsuid(); + const v2B = RunId.generateKsuid(); + expect(isKsuidId(legacyA.id)).toBe(false); + expect(isKsuidId(v2A.id)).toBe(true); + + for (const run of [legacyA, legacyB, v2A, v2B]) { + await seedRoutedRun(prisma, { + id: run.id, + friendlyId: run.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + } + + const found = await store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + }); + + // ALL four runs come back, regardless of which physical table they live in. + expect(found.map((r) => r.id).sort()).toEqual( + [legacyA.id, legacyB.id, v2A.id, v2B.id].sort() + ); + } + ); + + postgresTest( + "findRuns (ordered+limited) 2-way merges both tables to the globally-correct first N", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // Interleave createdAt across the two tables so a per-table take+slice + // would be WRONG: the newest run is in v2, the 2nd-newest in legacy, etc. + // t5 (v2) > t4 (legacy) > t3 (v2) > t2 (legacy) > t1 (v2) > t0 (legacy) + const base = new Date("2026-06-01T00:00:00.000Z").getTime(); + const at = (i: number) => new Date(base + i * 60_000); + + const legacy0 = RunId.generate(); // t0 (oldest) + const v2_1 = RunId.generateKsuid(); // t1 + const legacy2 = RunId.generate(); // t2 + const v2_3 = RunId.generateKsuid(); // t3 + const legacy4 = RunId.generate(); // t4 + const v2_5 = RunId.generateKsuid(); // t5 (newest) + + const seeded: Array<{ id: string; friendlyId: string; t: number }> = [ + { id: legacy0.id, friendlyId: legacy0.friendlyId, t: 0 }, + { id: v2_1.id, friendlyId: v2_1.friendlyId, t: 1 }, + { id: legacy2.id, friendlyId: legacy2.friendlyId, t: 2 }, + { id: v2_3.id, friendlyId: v2_3.friendlyId, t: 3 }, + { id: legacy4.id, friendlyId: legacy4.friendlyId, t: 4 }, + { id: v2_5.id, friendlyId: v2_5.friendlyId, t: 5 }, + ]; + + for (const run of seeded) { + await seedRoutedRun(prisma, { + id: run.id, + friendlyId: run.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + createdAt: at(run.t), + }); + } + + const found = await store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + orderBy: { createdAt: "desc" }, + take: 3, + }); + + // The globally-newest 3 — drawn from BOTH tables in true createdAt order, + // NOT three rows from one table. + expect(found.map((r) => r.id)).toEqual([v2_5.id, legacy4.id, v2_3.id]); + } + ); + + postgresTest( + "findRuns scoping: a run in another env is NOT returned from either table", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // A second env in the same project. + const otherEnv = await prisma.runtimeEnvironment.create({ + data: { + type: "PREVIEW", + slug: "other", + projectId: project.id, + organizationId: organization.id, + apiKey: "tr_other_apikey", + pkApiKey: "pk_other_apikey", + shortcode: "other_short_code", + }, + }); + + // One legacy + one v2 run in the TARGET env. + const legacyTarget = RunId.generate(); + const v2Target = RunId.generateKsuid(); + // One legacy + one v2 run in the OTHER env (must never surface). + const legacyOther = RunId.generate(); + const v2Other = RunId.generateKsuid(); + + await seedRoutedRun(prisma, { + id: legacyTarget.id, + friendlyId: legacyTarget.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + await seedRoutedRun(prisma, { + id: v2Target.id, + friendlyId: v2Target.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + await seedRoutedRun(prisma, { + id: legacyOther.id, + friendlyId: legacyOther.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: otherEnv.id, + }); + await seedRoutedRun(prisma, { + id: v2Other.id, + friendlyId: v2Other.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: otherEnv.id, + }); + + const found = await store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + }); + + // The same `where` fences BOTH tables: only the target env's runs come back. + expect(found.map((r) => r.id).sort()).toEqual([legacyTarget.id, v2Target.id].sort()); + const foundIds = new Set(found.map((r) => r.id)); + expect(foundIds.has(legacyOther.id)).toBe(false); + expect(foundIds.has(v2Other.id)).toBe(false); + } + ); + + postgresTest( + "findRuns (include) returns hydrated relations from both tables", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const legacy = RunId.generate(); + const v2 = RunId.generateKsuid(); + + await seedRoutedRun(prisma, { + id: legacy.id, + friendlyId: legacy.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + await seedRoutedRun(prisma, { + id: v2.id, + friendlyId: v2.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + + const found = await store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + include: { runtimeEnvironment: true }, + }); + + expect(found).toHaveLength(2); + // Both rows — legacy and v2 — carry the hydrated relation. + for (const run of found) { + expect(run.runtimeEnvironment).not.toBeNull(); + expect(run.runtimeEnvironment.id).toBe(environment.id); + expect(run.runtimeEnvironment.slug).toBe("dev"); + } + } + ); + + postgresTest( + "findRuns (take, no orderBy) caps the combined result across both tables", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // 2 legacy + 2 v2; an unordered `take: 3` must return exactly 3, all + // belonging to the scoped env. + const runs = [ + RunId.generate(), + RunId.generate(), + RunId.generateKsuid(), + RunId.generateKsuid(), + ]; + for (const run of runs) { + await seedRoutedRun(prisma, { + id: run.id, + friendlyId: run.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + } + + const found = await store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + take: 3, + }); + + expect(found).toHaveLength(3); + const allIds = new Set(runs.map((r) => r.id)); + for (const run of found) { + expect(allIds.has(run.id)).toBe(true); + } + } + ); + + postgresTest( + "findRuns (ordered+limited) by id alone is rejected: id is not a total cross-table order", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const legacy = RunId.generate(); + await seedRoutedRun(prisma, { + id: legacy.id, + friendlyId: legacy.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + + await expect( + store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + orderBy: { id: "asc" }, + take: 10, + }) + ).rejects.toThrow(/total order/i); + } + ); + + postgresTest( + "findRuns (ordered+limited) rejects a Prisma cursor it cannot span across two tables", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const legacy = RunId.generate(); + await seedRoutedRun(prisma, { + id: legacy.id, + friendlyId: legacy.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + + await expect( + store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + orderBy: { createdAt: "desc" }, + take: 5, + cursor: { id: legacy.id }, + }) + ).rejects.toThrow(/cursor/i); + } + ); }); describe("PostgresRunStore — v2 nested writes (run + related rows via nested Prisma create)", () => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index c800386b720..6f18a3dd4fc 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -813,7 +813,298 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = client ?? this.readOnlyPrisma; - return prisma.taskRun.findMany(args); + // A run lives in exactly one physical table, chosen by its id format, so a + // multi-row read must hit BOTH `TaskRun` (legacy cuid) and `task_run_v2` + // (new ksuid) and combine. `task_run_v2` is an identical clone of `TaskRun` + // (same relation surface), so the SAME `args` — crucially the SAME `where`, + // which is the security scope — run unchanged against either delegate. + const legacyModel = prisma.taskRun; + const v2Model = prisma.taskRunV2 as unknown as typeof prisma.taskRun; + + const ordered = this.#normalizeOrderBy(args.orderBy); + + // ORDERED + LIMITED → bounded 2-way merge. + // + // A single Prisma `cursor` addresses one table's row and cannot span two + // tables, so reject it on this path rather than silently paginating one + // table. (No current caller pairs `cursor` with `orderBy`+`take`; keyset + // callers carry the cursor in `where`, which both queries honor.) + if (ordered.length > 0 && args.take !== undefined) { + if (args.cursor !== undefined) { + throw new Error( + "RunStore.findRuns: a Prisma `cursor` cannot address two tables on an ordered+limited read. " + + "Use a where-based keyset (e.g. `where: { createdAt: { lt: X } }`) instead." + ); + } + + const comparator = this.#buildCrossTableComparator(ordered); + + // The in-memory comparator reads the order keys off each row, so they + // MUST be in the projection. If the caller's `select` omits one, add it + // for the query and strip it from the output. (`include`/full-row already + // carry every scalar.) + const { args: queryArgs, addedKeys } = this.#withOrderKeysSelected(args, ordered); + + // Take at most `take` from each table: the merged head of two ordered + // streams of length `take` is fully determined by their first `take` rows. + const perTableArgs = { ...queryArgs, take: args.take }; + + const [legacyRows, v2Rows] = (await Promise.all([ + legacyModel.findMany(perTableArgs), + v2Model.findMany(perTableArgs), + ])) as [Array>, Array>]; + + const merged = this.#mergeOrdered(legacyRows, v2Rows, comparator, args.take); + return this.#stripAddedKeys(merged, addedKeys); + } + + // UNORDERED / NO-LIMIT (or `take` without `orderBy`) → run the SAME args + // against both tables and concatenate. A run is in exactly one table, so + // concatenation is complete and has no duplicates. + // + // `orderBy` without `take` still needs the order keys projected so the + // whole-set re-sort below can read them. + const { args: queryArgs, addedKeys } = + ordered.length > 0 + ? this.#withOrderKeysSelected(args, ordered) + : { args, addedKeys: [] as string[] }; + + const [legacyRows, v2Rows] = (await Promise.all([ + legacyModel.findMany(queryArgs), + v2Model.findMany(queryArgs), + ])) as [Array>, Array>]; + + let combined = legacyRows.concat(v2Rows); + + // `orderBy` without `take`: each table came back ordered, but the + // concatenation is not — re-sort the whole bounded set to honor the order. + if (ordered.length > 0) { + const comparator = this.#buildCrossTableComparator(ordered); + combined = combined.sort(comparator); + } + + // `take` without `orderBy`: an unordered cap. Each table was capped at + // `take`, so the concatenation is at most `2*take`; trim to `take`. Order + // among unordered rows is unspecified either way. + if (args.take !== undefined) { + combined = combined.slice(0, args.take); + } + + return this.#stripAddedKeys(combined, addedKeys); + } + + /** + * The cross-table merge/sort compares order-key VALUES read off each returned + * row, so every scalar order key must be present in the projection. When the + * caller passes a `select` that omits an order key, add it (so the row carries + * the value) and record which keys were added so they can be stripped from the + * final output — the caller asked not to see them. A query with `include`, or + * with neither `select` nor `include` (full row), already returns every scalar + * column, so nothing is added. + */ + #withOrderKeysSelected( + args: { + where: Prisma.TaskRunWhereInput; + select?: Prisma.TaskRunSelect; + include?: Prisma.TaskRunInclude; + orderBy?: Prisma.TaskRunOrderByWithRelationInput | Prisma.TaskRunOrderByWithRelationInput[]; + take?: number; + skip?: number; + cursor?: Prisma.TaskRunWhereUniqueInput; + }, + ordered: Array<{ key: string; direction: "asc" | "desc" }> + ): { + args: typeof args; + addedKeys: string[]; + } { + // The merge always tiebreaks on `id`, so it must be readable too. + const requiredKeys = new Set([...ordered.map((entry) => entry.key), "id"]); + + if (!args.select) { + // include / full-row: all scalars are present already. + return { args, addedKeys: [] }; + } + + const select = args.select as Record; + const addedKeys: string[] = []; + const augmentedSelect: Record = { ...select }; + + for (const key of requiredKeys) { + if (!(key in augmentedSelect)) { + augmentedSelect[key] = true; + addedKeys.push(key); + } + } + + if (addedKeys.length === 0) { + return { args, addedKeys: [] }; + } + + return { args: { ...args, select: augmentedSelect as Prisma.TaskRunSelect }, addedKeys }; + } + + /** Remove the order-key columns that were added purely to drive the merge. */ + #stripAddedKeys( + rows: Array>, + addedKeys: string[] + ): Array> { + if (addedKeys.length === 0) { + return rows; + } + + for (const row of rows) { + for (const key of addedKeys) { + delete row[key]; + } + } + + return rows; + } + + /** + * Normalize the optional `orderBy` (single object or array) into an array of + * single-key order entries, preserving precedence. An empty array means "no + * ordering requested". + */ + #normalizeOrderBy( + orderBy: + | Prisma.TaskRunOrderByWithRelationInput + | Prisma.TaskRunOrderByWithRelationInput[] + | undefined + ): Array<{ key: string; direction: "asc" | "desc" }> { + if (orderBy === undefined) { + return []; + } + + const list = Array.isArray(orderBy) ? orderBy : [orderBy]; + const entries: Array<{ key: string; direction: "asc" | "desc" }> = []; + + for (const clause of list) { + for (const [key, value] of Object.entries(clause)) { + // Only scalar `{ field: "asc" | "desc" }` entries are mergeable in + // memory. A relation/nested sort (value is an object) can't be compared + // here — flag it rather than mis-order across the two tables. + if (value === "asc" || value === "desc") { + entries.push({ key, direction: value }); + } else { + throw new Error( + `RunStore.findRuns: cannot merge across tables on a non-scalar orderBy key "${key}". ` + + "Ordered+limited cross-table reads must order by a scalar column (a time/createdAt field, with id as a tiebreak)." + ); + } + } + } + + return entries; + } + + /** + * Build a total-order comparator from the requested scalar order keys. + * + * The cross-table merge is only correct when the order is a TOTAL order over + * the union of both tables. A time-based column (`createdAt`, or any other + * Date column) provides that; `id` alone does NOT — a cuid and a ksuid live + * in different, non-interleaving id spaces, so ordering the union by `id` + * lexicographically is meaningless. Require a time/createdAt key to lead (or + * appear in) the order, and use `id` only as a within-timestamp tiebreak. + */ + #buildCrossTableComparator( + ordered: Array<{ key: string; direction: "asc" | "desc" }> + ): (a: Record, b: Record) => number { + const hasTimeKey = ordered.some((entry) => this.#isTimeOrderKey(entry.key)); + + if (!hasTimeKey) { + const keys = ordered.map((entry) => entry.key).join(", "); + throw new Error( + `RunStore.findRuns: ordered+limited read orders by [${keys}], which is not a valid total order across the ` + + "legacy TaskRun (cuid) and task_run_v2 (ksuid) tables. Order by a time/createdAt column (id may follow as a tiebreak)." + ); + } + + // Ensure `id` is present as a final tiebreak so the merge is deterministic + // when two rows share the leading timestamp. Use the direction of the + // leading order key for the tiebreak. + const comparators = [...ordered]; + if (!comparators.some((entry) => entry.key === "id")) { + comparators.push({ key: "id", direction: ordered[0].direction }); + } + + return (a, b) => { + for (const { key, direction } of comparators) { + const cmp = this.#compareValues(a[key], b[key]); + if (cmp !== 0) { + return direction === "asc" ? cmp : -cmp; + } + } + return 0; + }; + } + + /** + * A column is a valid cross-table total-order lead when it is time-based. + * `createdAt` is the canonical one; the other Date columns the callers use + * (`updatedAt`, `completedAt`, etc.) qualify too. The selected/included row + * must carry the column for the comparator to read it. + */ + #isTimeOrderKey(key: string): boolean { + return ( + key === "createdAt" || + key === "updatedAt" || + key === "completedAt" || + key === "startedAt" || + key === "queuedAt" || + key === "lockedAt" || + key === "delayUntil" || + key === "expiredAt" + ); + } + + /** Ascending comparison of two scalar order values (Date, number, string). */ + #compareValues(a: unknown, b: unknown): number { + if (a === b) return 0; + // Nulls sort last (Prisma's default for `nulls: "last"` is the common case; + // a stable, deterministic placement is what matters for the merge). + if (a === null || a === undefined) return 1; + if (b === null || b === undefined) return -1; + + if (a instanceof Date && b instanceof Date) { + return a.getTime() - b.getTime(); + } + if (typeof a === "number" && typeof b === "number") { + return a - b; + } + return String(a) < String(b) ? -1 : String(a) > String(b) ? 1 : 0; + } + + /** + * 2-way merge of two already-ordered streams into the first `take` rows of + * their combined order. Bounded: walks at most `take` steps. The two inputs + * are each `findMany`-ordered by the SAME order keys, so a single linear pass + * picking the smaller head under `comparator` yields the globally-correct head. + */ + #mergeOrdered( + left: Array>, + right: Array>, + comparator: (a: Record, b: Record) => number, + take: number + ): Array> { + const out: Array> = []; + let i = 0; + let j = 0; + + while (out.length < take && (i < left.length || j < right.length)) { + if (i >= left.length) { + out.push(right[j++]); + } else if (j >= right.length) { + out.push(left[i++]); + } else if (comparator(left[i], right[j]) <= 0) { + out.push(left[i++]); + } else { + out.push(right[j++]); + } + } + + return out; } /** From 37b7f973d75ec76955eb87876e6378ed7ac703bc Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 16:47:00 +0100 Subject: [PATCH 40/83] fix(webapp): read runs across both run tables with a time keyset runsBackfiller paginates on a (createdAt, id) keyset instead of id alone. The ClickHouse runs list restores ClickHouse ranking in memory after hydrating rows by id, since a single SQL order cannot span the two tables. --- .../app/services/runsBackfiller.server.ts | 48 ++++++++++++++++--- .../clickhouseRunsRepository.server.ts | 14 ++++-- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/apps/webapp/app/services/runsBackfiller.server.ts b/apps/webapp/app/services/runsBackfiller.server.ts index 50e041ee64b..09386c9495c 100644 --- a/apps/webapp/app/services/runsBackfiller.server.ts +++ b/apps/webapp/app/services/runsBackfiller.server.ts @@ -41,6 +41,13 @@ export class RunsBackfillerService { span.setAttribute("cursor", cursor ?? ""); span.setAttribute("batchSize", batchSize ?? 0); + // Keyset on (createdAt, id). Runs now live across two physical tables + // (legacy TaskRun with cuid ids, task_run_v2 with ksuid ids), and `id` + // alone is not a valid order across them: cuid and ksuid sort in + // different ranges. RunStore merges the two tables only on a time-based + // key, so order by createdAt and tiebreak on id within a timestamp. + const keyset = cursor ? decodeBackfillCursor(cursor) : undefined; + const runs = await runStore.findRuns( { where: { @@ -51,11 +58,16 @@ export class RunsBackfillerService { status: { in: FINAL_RUN_STATUSES, }, - ...(cursor ? { id: { gt: cursor } } : {}), - }, - orderBy: { - id: "asc", + ...(keyset + ? { + OR: [ + { createdAt: { gt: keyset.createdAt } }, + { createdAt: keyset.createdAt, id: { gt: keyset.id } }, + ], + } + : {}), }, + orderBy: [{ createdAt: "asc" }, { id: "asc" }], take: batchSize, }, this.prisma @@ -94,8 +106,32 @@ export class RunsBackfillerService { lastRunId: lastRun.id, }); - // Return the last run ID to continue from - return lastRun.id; + // Return a (createdAt, id) cursor to continue from on the next batch. + return encodeBackfillCursor(lastRun.createdAt, lastRun.id); }); } } + +// The backfill cursor is an opaque "_" string. The admin +// worker passes it back verbatim across batches; only this service interprets +// it. An ISO timestamp contains no "_" and run ids are base62/base36, so the +// first "_" cleanly splits the two halves. +const BACKFILL_CURSOR_SEPARATOR = "_"; + +export function encodeBackfillCursor(createdAt: Date, id: string): string { + return `${createdAt.toISOString()}${BACKFILL_CURSOR_SEPARATOR}${id}`; +} + +export function decodeBackfillCursor(cursor: string): { createdAt: Date; id: string } { + const separatorIndex = cursor.indexOf(BACKFILL_CURSOR_SEPARATOR); + const createdAt = separatorIndex === -1 ? new Date(NaN) : new Date(cursor.slice(0, separatorIndex)); + const id = separatorIndex === -1 ? "" : cursor.slice(separatorIndex + 1); + + if (Number.isNaN(createdAt.getTime()) || id.length === 0) { + throw new Error( + `RunsBackfillerService: malformed cursor "${cursor}" (expected "_")` + ); + } + + return { createdAt, id }; +} diff --git a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts index d32652a0b3b..9602a1267df 100644 --- a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts @@ -169,16 +169,13 @@ export class ClickHouseRunsRepository implements IRunsRepository { async listRuns(options: ListRunsOptions) { const { runIds, pagination } = await this.listRunIds(options); - let runs = await runStore.findRuns( + const hydrated = await runStore.findRuns( { where: { id: { in: runIds, }, }, - orderBy: { - id: "desc", - }, select: { id: true, friendlyId: true, @@ -216,6 +213,15 @@ export class ClickHouseRunsRepository implements IRunsRepository { this.options.prisma ); + // ClickHouse already ranked `runIds`. An `IN (...)` hydration comes back + // unordered, and a single SQL `orderBy` can't span the two physical run + // tables (legacy TaskRun + task_run_v2), so restore ClickHouse's ranking + // in memory. + const runById = new Map(hydrated.map((run) => [run.id, run])); + let runs = runIds + .map((id) => runById.get(id)) + .filter((run): run is NonNullable => run !== undefined); + // ClickHouse is slightly delayed, so we're going to do in-memory status filtering too if (options.statuses && options.statuses.length > 0) { runs = runs.filter((run) => options.statuses!.includes(run.status)); From 658b3850e8979609ad7ccb609b19060d2d70d5ec Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 16:56:51 +0100 Subject: [PATCH 41/83] feat(run-store): read non-id predicates across both run tables findRun and findRunOrThrow route by the id/friendlyId in the predicate. A lookup that carries neither (the idempotency-key dedup, or an "are there any runs in this environment" check) previously defaulted to the legacy table and would miss a match that lives in task_run_v2. Such predicates now query both tables in parallel and return the first match, so a reused idempotency key is found wherever its run lives and no duplicate is created. --- .../run-store/src/PostgresRunStore.test.ts | 74 ++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 84 +++++++++++++------ 2 files changed, 131 insertions(+), 27 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index a5866fa5d81..1a982d87dc6 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -1970,6 +1970,80 @@ describe("PostgresRunStore — table routing by id format", () => { } ); + postgresTest( + "findRun resolves a non-id predicate (idempotency key) against a run in either table", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // A KSUID run carrying an idempotency key lands in task_run_v2 … + const ksuid = RunId.generateKsuid(); + await seedRoutedRun(prisma, { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-v2", + taskIdentifier: "my-task", + }); + + // … and a cuid run carrying a different key lands in legacy TaskRun. + const cuid = RunId.generate(); + await seedRoutedRun(prisma, { + id: cuid.id, + friendlyId: cuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-legacy", + taskIdentifier: "my-task", + }); + + // The lookup carries no id/friendlyId, so it must read BOTH tables — + // this is the mixed-window idempotency dedup. Miss either table and a + // reused key produces a duplicate run. + const v2Hit = await store.findRun({ + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-v2", + taskIdentifier: "my-task", + }); + expect(v2Hit?.id).toBe(ksuid.id); + + const legacyHit = await store.findRun({ + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-legacy", + taskIdentifier: "my-task", + }); + expect(legacyHit?.id).toBe(cuid.id); + + // A key in neither table returns null — no false dedup. + const miss = await store.findRun({ + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-missing", + taskIdentifier: "my-task", + }); + expect(miss).toBeNull(); + + // findRunOrThrow takes the same both-table path: it finds the v2 row … + const thrown = await store.findRunOrThrow({ + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-v2", + taskIdentifier: "my-task", + }); + expect(thrown.id).toBe(ksuid.id); + + // … and throws when neither table matches. + await expect( + store.findRunOrThrow({ + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-missing", + taskIdentifier: "my-task", + }) + ).rejects.toThrow(); + } + ); + postgresTest( "expireRunsBatch with a mixed array updates both tables and returns the combined count", async ({ prisma }) => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 6f18a3dd4fc..d07c9630db4 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -63,27 +63,43 @@ export class PostgresRunStore implements RunStore { } /** - * Route a single-row read to its physical table from the routing key in the - * `where` clause. `findRun`/`findRunOrThrow` are always called with a - * `{ id }` or `{ friendlyId }` predicate; both carry the same KSUID/cuid body - * and route identically. When neither is a plain string (e.g. an unexpected - * predicate-only read), default to the legacy `taskRun` table — matching the - * pre-split single-table behavior. + * The routing key for a single-row read: the `{ id }` or `{ friendlyId }` + * value in the `where` clause. Both carry the same KSUID/cuid body and route + * to the same physical table. Returns `undefined` for a predicate that + * addresses no specific run (e.g. an idempotency-key lookup), which must read + * both tables rather than assume one. */ - #runReadModel( + #routingKeyOf(where: Prisma.TaskRunWhereInput): string | undefined { + return typeof where.id === "string" + ? where.id + : typeof where.friendlyId === "string" + ? where.friendlyId + : undefined; + } + + /** + * Read a single row matching a non-id predicate from BOTH physical tables. + * A run lives in exactly one table (chosen by its id format), so a key-based + * predicate (idempotency key, "has this env any runs") can match a row in + * either. Query both in parallel and return the first match — at most one + * side is non-null, and legacy is preferred for a stable result if a + * predicate ever matches both. `task_run_v2` is an identical clone of + * `TaskRun`, so the SAME args (select/include and the security-scoping + * `where`) run unchanged against either delegate. + */ + async #findFirstAcrossTables( prisma: PrismaClientOrTransaction | PrismaReplicaClient, - where: Prisma.TaskRunWhereInput - ) { - const routingKey = - typeof where.id === "string" - ? where.id - : typeof where.friendlyId === "string" - ? where.friendlyId - : undefined; - - return routingKey !== undefined && isKsuidId(routingKey) - ? (prisma.taskRunV2 as unknown as typeof prisma.taskRun) - : prisma.taskRun; + where: Prisma.TaskRunWhereInput, + args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } + ): Promise { + const v2Model = prisma.taskRunV2 as unknown as typeof prisma.taskRun; + + const [legacyRun, v2Run] = await Promise.all([ + prisma.taskRun.findFirst({ where, ...args }), + v2Model.findFirst({ where, ...args }), + ]); + + return legacyRun ?? v2Run; } async createRun( @@ -734,10 +750,15 @@ export class PostgresRunStore implements RunStore { ): Promise { const { args, prisma } = this.#resolveReadArgs(argsOrClient, client); - return this.#runReadModel(prisma, where).findFirst({ - where, - ...args, - }); + const routingKey = this.#routingKeyOf(where); + if (routingKey !== undefined) { + // by id / friendlyId: the id format picks exactly one table, O(1). + return this.runModel(prisma, routingKey).findFirst({ where, ...args }); + } + + // Non-id predicate (e.g. idempotency-key dedup): the match can be in + // either table, so read both. + return this.#findFirstAcrossTables(prisma, where, args); } findRunOrThrow( @@ -761,10 +782,19 @@ export class PostgresRunStore implements RunStore { ): Promise { const { args, prisma } = this.#resolveReadArgs(argsOrClient, client); - return this.#runReadModel(prisma, where).findFirstOrThrow({ - where, - ...args, - }); + const routingKey = this.#routingKeyOf(where); + if (routingKey !== undefined) { + return this.runModel(prisma, routingKey).findFirstOrThrow({ where, ...args }); + } + + // Non-id predicate: read both tables, then enforce the throw-on-miss + // contract ourselves (neither table's findFirstOrThrow could see the + // other's row). + const run = await this.#findFirstAcrossTables(prisma, where, args); + if (run === null || run === undefined) { + throw new Error("PostgresRunStore.findRunOrThrow: no run matched the predicate"); + } + return run; } findRuns( From 47610ee3b0a54babbfff10c9b62f7f362ab6e74a Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 17:02:09 +0100 Subject: [PATCH 42/83] feat(webapp): per-org cutover flag for the v2 run table Adds a per-org runTableV2 feature flag, read in memory at the single run-id mint site in the trigger path. When on, the org mints a KSUID id for new runs (routing them to task_run_v2); off, the default, keeps minting legacy ids. The read is a pure lookup on the org featureFlags already loaded at auth, so the trigger path adds no query. RunStore routes purely by id format and never sees this flag. --- .../runEngine/services/triggerTask.server.ts | 13 ++++++++- apps/webapp/app/v3/featureFlags.ts | 7 +++++ apps/webapp/app/v3/runTableV2.server.ts | 28 +++++++++++++++++++ apps/webapp/test/runTableV2.test.ts | 28 +++++++++++++++++++ 4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 apps/webapp/app/v3/runTableV2.server.ts create mode 100644 apps/webapp/test/runTableV2.test.ts diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 89a938da8bf..8c61b7d7fcd 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -25,6 +25,7 @@ import { logger } from "~/services/logger.server"; import { parseDelay } from "~/utils/delays"; import { handleMetadataPacket } from "~/utils/packets"; import { startSpan } from "~/v3/tracing.server"; +import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; import type { TriggerTaskServiceOptions, TriggerTaskServiceResult, @@ -151,7 +152,17 @@ export class RunEngineTriggerTaskService { span.setAttribute("taskId", taskId); span.setAttribute("attempt", attempt); - const runFriendlyId = options?.runFriendlyId ?? RunId.generate().friendlyId; + // The single per-org cutover point: an opted-in org mints a KSUID id + // (routing the run to task_run_v2), everyone else keeps a legacy id + // (TaskRun). The flag is a pure in-memory read of the org's + // featureFlags already loaded on `environment` — no DB query on the + // trigger hot path. Downstream routing is by id format only. + const runFriendlyId = + options?.runFriendlyId ?? + (shouldUseV2RunTable(environment.organization.featureFlags) + ? RunId.generateKsuid() + : RunId.generate() + ).friendlyId; const triggerRequest = { taskId, friendlyId: runFriendlyId, diff --git a/apps/webapp/app/v3/featureFlags.ts b/apps/webapp/app/v3/featureFlags.ts index 6b75b9ef903..000013f6d23 100644 --- a/apps/webapp/app/v3/featureFlags.ts +++ b/apps/webapp/app/v3/featureFlags.ts @@ -16,6 +16,7 @@ export const FEATURE_FLAG = { computeMigrationFreePercentage: "computeMigrationFreePercentage", computeMigrationPaidPercentage: "computeMigrationPaidPercentage", computeMigrationRequireTemplate: "computeMigrationRequireTemplate", + runTableV2: "runTableV2", } as const; export const FeatureFlagCatalog = { @@ -43,6 +44,12 @@ export const FeatureFlagCatalog = { // When on, migrated orgs build their compute template in required mode at deploy // (fails the deploy on error) instead of shadow. Strict boolean (see above). [FEATURE_FLAG.computeMigrationRequireTemplate]: z.boolean(), + // Per-org cutover to the parallel task_run_v2 table. When on, new runs for the + // org mint a KSUID id (routing them to task_run_v2); off (the default) keeps + // minting legacy ids. Strict boolean (see above): coercing a stringified + // "false" to true would cut an org over by mistake, and runs created on v2 + // stay on v2. + [FEATURE_FLAG.runTableV2]: z.boolean(), }; export type FeatureFlagKey = keyof typeof FeatureFlagCatalog; diff --git a/apps/webapp/app/v3/runTableV2.server.ts b/apps/webapp/app/v3/runTableV2.server.ts new file mode 100644 index 00000000000..51b55aefbe3 --- /dev/null +++ b/apps/webapp/app/v3/runTableV2.server.ts @@ -0,0 +1,28 @@ +import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags"; + +/** + * Per-org cutover switch for the parallel `task_run_v2` run table. + * + * Read in memory from `Organization.featureFlags` (already loaded on the + * AuthenticatedEnvironment at API-key auth, so this adds no DB query) at the + * single run-id mint site in the trigger path. On → mint a KSUID id, which + * routes the run to `task_run_v2`; off (the default) → mint a legacy id, which + * routes to `TaskRun`. + * + * RunStore never reads this flag: it routes purely by id format. The flag only + * decides which id scheme is minted upstream. Disabling it sends only NEW runs + * back to legacy; runs already created on v2 stay readable there (routed by id). + */ +export function shouldUseV2RunTable(orgFeatureFlags: unknown): boolean { + if (orgFeatureFlags === null || typeof orgFeatureFlags !== "object") { + return false; + } + + const override = (orgFeatureFlags as Record)[FEATURE_FLAG.runTableV2]; + if (override === undefined) { + return false; + } + + const parsed = FeatureFlagCatalog[FEATURE_FLAG.runTableV2].safeParse(override); + return parsed.success ? parsed.data : false; +} diff --git a/apps/webapp/test/runTableV2.test.ts b/apps/webapp/test/runTableV2.test.ts new file mode 100644 index 00000000000..9abae4cb7bb --- /dev/null +++ b/apps/webapp/test/runTableV2.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from "vitest"; +import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; + +describe("shouldUseV2RunTable", () => { + it("defaults to false when the org has no flags", () => { + expect(shouldUseV2RunTable(null)).toBe(false); + expect(shouldUseV2RunTable(undefined)).toBe(false); + expect(shouldUseV2RunTable({})).toBe(false); + }); + + it("returns true only when the flag is the boolean true", () => { + expect(shouldUseV2RunTable({ runTableV2: true })).toBe(true); + expect(shouldUseV2RunTable({ runTableV2: false })).toBe(false); + }); + + it("rejects a stringified flag value (strict boolean, no coercion)", () => { + // A stringified "false" must not coerce to true and cut the org over. + expect(shouldUseV2RunTable({ runTableV2: "true" })).toBe(false); + expect(shouldUseV2RunTable({ runTableV2: "false" })).toBe(false); + expect(shouldUseV2RunTable({ runTableV2: 1 })).toBe(false); + }); + + it("ignores unrelated flags and non-object inputs", () => { + expect(shouldUseV2RunTable({ mollifierEnabled: true })).toBe(false); + expect(shouldUseV2RunTable("runTableV2")).toBe(false); + expect(shouldUseV2RunTable(42)).toBe(false); + }); +}); From 912a504a534b75d266adcd7db886ba32451e4114 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 17:41:46 +0100 Subject: [PATCH 43/83] feat(replication): co-publish additional tables and reconcile existing publications LogicalReplicationClient gains an optional additionalTables option. These are published alongside the primary table in the same publication, and their WAL events stream through the same data handler. When the publication already exists, missing tables are added via ALTER PUBLICATION ADD TABLE (online, slot-preserving) instead of erroring, so a publication can gain a table without a drop and recreate. --- internal-packages/replication/src/client.ts | 78 +++++++++++++++------ 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/internal-packages/replication/src/client.ts b/internal-packages/replication/src/client.ts index 1a7ddb27236..044c5e19320 100644 --- a/internal-packages/replication/src/client.ts +++ b/internal-packages/replication/src/client.ts @@ -23,6 +23,14 @@ export interface LogicalReplicationClientOptions { * The table to replicate (for publication creation). */ table: string; + /** + * Additional tables to co-publish into the same publication. Their WAL + * events stream through the same `data` handler as `table`, so use this only + * when the extra tables share `table`'s row shape and downstream transform + * (e.g. a parallel clone table). On startup they are added to an existing + * publication via ALTER PUBLICATION ... ADD TABLE. + */ + additionalTables?: string[]; /** * The name of the replication slot to use. */ @@ -407,6 +415,15 @@ export class LogicalReplicationClient { return this; } + // The full set of tables this client publishes: the primary `table` plus any + // `additionalTables`. Order is stable so the publication's FOR TABLE clause is + // deterministic. + #allTables(): string[] { + return this.options.additionalTables + ? [this.options.table, ...this.options.additionalTables] + : [this.options.table]; + } + async #createPublication(): Promise { if (!this.client) { this.events.emit("error", new LogicalReplicationClientError("Client not connected")); @@ -416,8 +433,10 @@ export class LogicalReplicationClient { const publicationExists = await this.#doesPublicationExist(); if (publicationExists) { - // Validate the existing publication is correctly configured - const validationError = await this.#validatePublicationConfiguration(); + // Reconcile the existing publication: add any configured table it is + // missing (e.g. a clone table added after the publication was first + // created). Returns an error string only for unrecoverable mismatches. + const validationError = await this.#ensurePublicationConfiguration(); if (validationError) { this.logger.error("Publication exists but is misconfigured", { @@ -441,9 +460,13 @@ export class LogicalReplicationClient { return true; } + const tableList = this.#allTables() + .map((table) => `"${table}"`) + .join(", "); + const [createError] = await tryCatch( this.client.query( - `CREATE PUBLICATION "${this.options.publicationName}" FOR TABLE "${this.options.table}" ${ + `CREATE PUBLICATION "${this.options.publicationName}" FOR TABLE ${tableList} ${ this.options.publicationActions ? `WITH (publish = '${this.options.publicationActions.join(", ")}')` : "" @@ -483,32 +506,47 @@ export class LogicalReplicationClient { return res.rows[0].exists; } - async #validatePublicationConfiguration(): Promise { + async #ensurePublicationConfiguration(): Promise { if (!this.client) { - return "Cannot validate publication configuration: client not connected"; + return "Cannot ensure publication configuration: client not connected"; } - // Check if the publication has the correct table + // Which public tables the publication already carries. const tablesRes = await this.client.query( - `SELECT schemaname, tablename - FROM pg_publication_tables + `SELECT schemaname, tablename + FROM pg_publication_tables WHERE pubname = '${this.options.publicationName}';` ); - const tables = tablesRes.rows; - const expectedTable = this.options.table; - - // Check if the table is in the publication - const hasTable = tables.some( - (row) => row.tablename === expectedTable && row.schemaname === "public" + const currentTables = new Set( + tablesRes.rows + .filter((row) => row.schemaname === "public") + .map((row) => row.tablename as string) ); - if (!hasTable) { - if (tables.length === 0) { - return `Publication '${this.options.publicationName}' exists but has NO TABLES configured. Expected table: "public.${expectedTable}". Run: ALTER PUBLICATION ${this.options.publicationName} ADD TABLE "${expectedTable}";`; - } else { - const tableList = tables.map((t) => `"${t.schemaname}"."${t.tablename}"`).join(", "); - return `Publication '${this.options.publicationName}' exists but does not include the required table "public.${expectedTable}". Current tables: ${tableList}. Run: ALTER PUBLICATION ${this.options.publicationName} ADD TABLE "${expectedTable}";`; + // Reconcile rather than reject: add any configured table the publication is + // missing. ALTER PUBLICATION ... ADD TABLE is online and leaves the slot + // position intact, so an existing publication can gain a table (e.g. + // task_run_v2 alongside TaskRun) without a drop/recreate. ADD TABLE on a + // table already published raises duplicate_object (42710); treat that as a + // benign race (another instance won) rather than a failure. + const missingTables = this.#allTables().filter((table) => !currentTables.has(table)); + + for (const table of missingTables) { + this.logger.info("Adding table to existing publication", { + name: this.options.name, + publicationName: this.options.publicationName, + table, + }); + + const [addError] = await tryCatch( + this.client.query( + `ALTER PUBLICATION "${this.options.publicationName}" ADD TABLE "${table}";` + ) + ); + + if (addError && (addError as { code?: string }).code !== "42710") { + return `Failed to add table "public.${table}" to publication '${this.options.publicationName}': ${addError.message}`; } } From 3549341eef35c2cf4809eae3bf591abf0102036f Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Fri, 19 Jun 2026 17:41:46 +0100 Subject: [PATCH 44/83] feat(webapp): stream task_run_v2 into ClickHouse runsReplicationService co-publishes task_run_v2 alongside TaskRun. It is a column-identical clone, so its WAL rows flow through the same transform into the same ClickHouse table, keeping the mirror complete once orgs cut over to v2 run ids. task_run_v2 needs REPLICA IDENTITY FULL, applied the same out-of-band way as TaskRun, so update and delete events carry the old row. --- .../services/runsReplicationService.server.ts | 5 + .../runsReplicationService.taskRunV2.test.ts | 125 ++++++++++++++++++ apps/webapp/test/utils/replicationUtils.ts | 4 + 3 files changed, 134 insertions(+) create mode 100644 apps/webapp/test/runsReplicationService.taskRunV2.test.ts diff --git a/apps/webapp/app/services/runsReplicationService.server.ts b/apps/webapp/app/services/runsReplicationService.server.ts index 31d8a3844cf..604056de8e7 100644 --- a/apps/webapp/app/services/runsReplicationService.server.ts +++ b/apps/webapp/app/services/runsReplicationService.server.ts @@ -227,6 +227,11 @@ export class RunsReplicationService { slotName: options.slotName, publicationName: options.publicationName, table: "TaskRun", + // task_run_v2 is a column-identical clone of TaskRun, so its WAL rows + // flow through the same handler/transform into the same ClickHouse table. + // Co-publishing it keeps the ClickHouse mirror complete once orgs cut over + // to v2 run ids; until then the table is empty and this is a no-op. + additionalTables: ["task_run_v2"], redisOptions: options.redisOptions, autoAcknowledge: false, publicationActions: ["insert", "update", "delete"], diff --git a/apps/webapp/test/runsReplicationService.taskRunV2.test.ts b/apps/webapp/test/runsReplicationService.taskRunV2.test.ts new file mode 100644 index 00000000000..bf31d97ffb3 --- /dev/null +++ b/apps/webapp/test/runsReplicationService.taskRunV2.test.ts @@ -0,0 +1,125 @@ +import { ClickHouse } from "@internal/clickhouse"; +import { replicationContainerTest } from "@internal/testcontainers"; +import { RunId } from "@trigger.dev/core/v3/isomorphic"; +import { setTimeout } from "node:timers/promises"; +import { z } from "zod"; +import { RunsReplicationService } from "~/services/runsReplicationService.server"; +import { createInMemoryTracing } from "./utils/tracing"; +import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsReplicationService (task_run_v2)", () => { + replicationContainerTest( + "co-publishes task_run_v2 and streams its rows to the same ClickHouse table", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + // Both tables are in the publication; both need FULL identity so the + // delete transform can read the old row. INSERTs (this test) carry the + // full new tuple regardless, but we mirror the production setup. + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + await prisma.$executeRawUnsafe(`ALTER TABLE public."task_run_v2" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication", + compression: { request: true }, + logLevel: "warn", + }); + + const { tracer } = createInMemoryTracing(); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + tracer, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { title: "test", slug: "test" }, + }); + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // A v2 run lives in task_run_v2, keyed by a KSUID id. + const ksuid = RunId.generateKsuid(); + const run = await prisma.taskRunV2.create({ + data: { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "v2trace", + spanId: "v2span", + queue: "test", + workerQueue: "us-east-1-next", + region: "us-east-1", + planType: "free", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await setTimeout(1000); + + const queryRuns = clickhouse.reader.query({ + name: "runs-replication", + query: "SELECT * FROM trigger_dev.task_runs_v2 WHERE run_id = {runId: String}", + schema: z.any(), + params: z.object({ runId: z.string() }), + }); + + const [queryError, result] = await queryRuns({ runId: run.id }); + + expect(queryError).toBeNull(); + expect(result?.length).toBe(1); + expect(result?.[0]).toEqual( + expect.objectContaining({ + run_id: run.id, + friendly_id: run.friendlyId, + task_identifier: "my-task", + environment_id: runtimeEnvironment.id, + project_id: project.id, + organization_id: organization.id, + environment_type: "DEVELOPMENT", + engine: "V2", + }) + ); + + await runsReplicationService.stop(); + } + ); +}); diff --git a/apps/webapp/test/utils/replicationUtils.ts b/apps/webapp/test/utils/replicationUtils.ts index 358da0c2cf6..713bd242892 100644 --- a/apps/webapp/test/utils/replicationUtils.ts +++ b/apps/webapp/test/utils/replicationUtils.ts @@ -17,6 +17,10 @@ export async function setupClickhouseReplication({ redisOptions: RedisOptions; }) { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + // task_run_v2 is co-published with TaskRun; it needs FULL identity too so + // UPDATE/DELETE WAL events carry the old row (the delete transform reads + // organizationId/environmentType off it). Mirrors the TaskRun line above. + await prisma.$executeRawUnsafe(`ALTER TABLE public."task_run_v2" REPLICA IDENTITY FULL;`); const clickhouse = new ClickHouse({ url: clickhouseUrl, From 4410999c8ecc376bb8d52ee9ee0825178bd3d442 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 09:17:12 +0100 Subject: [PATCH 45/83] test(webapp): de-flake the task_run_v2 replication streaming test Poll for the ClickHouse row with a bounded deadline instead of a fixed sleep, which is flaky under replication lag variance, and stop the replication service in a finally block so a failing assertion cannot leak it into later tests. --- .../runsReplicationService.taskRunV2.test.ts | 147 ++++++++++-------- 1 file changed, 78 insertions(+), 69 deletions(-) diff --git a/apps/webapp/test/runsReplicationService.taskRunV2.test.ts b/apps/webapp/test/runsReplicationService.taskRunV2.test.ts index bf31d97ffb3..9f0c6249feb 100644 --- a/apps/webapp/test/runsReplicationService.taskRunV2.test.ts +++ b/apps/webapp/test/runsReplicationService.taskRunV2.test.ts @@ -47,79 +47,88 @@ describe("RunsReplicationService (task_run_v2)", () => { await runsReplicationService.start(); - const organization = await prisma.organization.create({ - data: { title: "test", slug: "test" }, - }); - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - // A v2 run lives in task_run_v2, keyed by a KSUID id. - const ksuid = RunId.generateKsuid(); - const run = await prisma.taskRunV2.create({ - data: { - id: ksuid.id, - friendlyId: ksuid.friendlyId, - taskIdentifier: "my-task", - payload: JSON.stringify({ foo: "bar" }), - payloadType: "application/json", - traceId: "v2trace", - spanId: "v2span", - queue: "test", - workerQueue: "us-east-1-next", - region: "us-east-1", - planType: "free", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await setTimeout(1000); + try { + const organization = await prisma.organization.create({ + data: { title: "test", slug: "test" }, + }); + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); - const queryRuns = clickhouse.reader.query({ - name: "runs-replication", - query: "SELECT * FROM trigger_dev.task_runs_v2 WHERE run_id = {runId: String}", - schema: z.any(), - params: z.object({ runId: z.string() }), - }); + // A v2 run lives in task_run_v2, keyed by a KSUID id. + const ksuid = RunId.generateKsuid(); + const run = await prisma.taskRunV2.create({ + data: { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "v2trace", + spanId: "v2span", + queue: "test", + workerQueue: "us-east-1-next", + region: "us-east-1", + planType: "free", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); - const [queryError, result] = await queryRuns({ runId: run.id }); + const queryRuns = clickhouse.reader.query({ + name: "runs-replication", + query: "SELECT * FROM trigger_dev.task_runs_v2 WHERE run_id = {runId: String}", + schema: z.any(), + params: z.object({ runId: z.string() }), + }); - expect(queryError).toBeNull(); - expect(result?.length).toBe(1); - expect(result?.[0]).toEqual( - expect.objectContaining({ - run_id: run.id, - friendly_id: run.friendlyId, - task_identifier: "my-task", - environment_id: runtimeEnvironment.id, - project_id: project.id, - organization_id: organization.id, - environment_type: "DEVELOPMENT", - engine: "V2", - }) - ); + // ClickHouse replication is asynchronous: poll until the row lands + // (bounded) instead of a fixed sleep, which is flaky under lag variance. + let queryError: unknown = null; + let result: Array> | undefined; + const deadline = Date.now() + 10_000; + do { + [queryError, result] = await queryRuns({ runId: run.id }); + if (!queryError && result?.length === 1) break; + await setTimeout(200); + } while (Date.now() < deadline); - await runsReplicationService.stop(); + expect(queryError).toBeNull(); + expect(result?.length).toBe(1); + expect(result?.[0]).toEqual( + expect.objectContaining({ + run_id: run.id, + friendly_id: run.friendlyId, + task_identifier: "my-task", + environment_id: runtimeEnvironment.id, + project_id: project.id, + organization_id: organization.id, + environment_type: "DEVELOPMENT", + engine: "V2", + }) + ); + } finally { + await runsReplicationService.stop(); + } } ); }); From fd06ef4bd10d082e0cd5a128f1d80efcf8405bd6 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 10:29:04 +0100 Subject: [PATCH 46/83] fix(run-store): guard findRuns skip and skip the non-candidate table on id-list reads findRuns now throws when given skip: offset pagination cannot span the two run tables, where each would independently skip N rows from its own result rather than N from the merged result. For an id-list predicate (id in [...]), it now queries only the table whose id format can contain those ids, avoiding a wasted query against an empty task_run_v2 while it is unpopulated during rollout. --- .../run-store/src/PostgresRunStore.test.ts | 64 +++++++++++++++++- .../run-store/src/PostgresRunStore.ts | 65 ++++++++++++++++--- 2 files changed, 120 insertions(+), 9 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 1a982d87dc6..8d9fa62fd76 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -1,7 +1,7 @@ import { postgresTest } from "@internal/testcontainers"; import { isKsuidId, RunId } from "@trigger.dev/core/v3/isomorphic"; import type { PrismaClient } from "@trigger.dev/database"; -import { describe, expect } from "vitest"; +import { describe, expect, vi } from "vitest"; import { PostgresRunStore } from "./PostgresRunStore.js"; import type { CreateCancelledRunInput, CreateFailedRunInput, CreateRunInput } from "./types.js"; @@ -2378,6 +2378,68 @@ describe("PostgresRunStore — table routing by id format", () => { ).rejects.toThrow(/cursor/i); } ); + + postgresTest( + "findRuns rejects `skip` (offset pagination cannot span the two tables)", + async ({ prisma }) => { + const { environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + await expect( + store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + skip: 10, + take: 5, + }) + ).rejects.toThrow(/skip/i); + } + ); + + postgresTest( + "findRuns with an id-list partitions by id format and skips the table with no candidate ids", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + const cuid = RunId.generate(); + const ksuid = RunId.generateKsuid(); + for (const r of [cuid, ksuid]) { + await seedRoutedRun(prisma, { + id: r.id, + friendlyId: r.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + } + + const ids = (rows: unknown) => + (rows as Array<{ id: string }>).map((r) => r.id).sort(); + + // Mixed list: both tables queried, both runs returned. + expect( + ids(await store.findRuns({ where: { id: { in: [cuid.id, ksuid.id] } }, select: { id: true } })) + ).toEqual([cuid.id, ksuid.id].sort()); + + // cuid-only list: the task_run_v2 query is skipped, legacy run still returned. + const v2Spy = vi.spyOn(prisma.taskRunV2, "findMany"); + const legacyOnly = await store.findRuns({ where: { id: { in: [cuid.id] } }, select: { id: true } }); + expect(ids(legacyOnly)).toEqual([cuid.id]); + expect(v2Spy).not.toHaveBeenCalled(); + v2Spy.mockRestore(); + + // ksuid-only list: the TaskRun query is skipped, v2 run still returned. + const legacySpy = vi.spyOn(prisma.taskRun, "findMany"); + const v2Only = await store.findRuns({ where: { id: { in: [ksuid.id] } }, select: { id: true } }); + expect(ids(v2Only)).toEqual([ksuid.id]); + expect(legacySpy).not.toHaveBeenCalled(); + legacySpy.mockRestore(); + + // Empty list matches nothing. + expect(ids(await store.findRuns({ where: { id: { in: [] } }, select: { id: true } }))).toEqual([]); + } + ); }); describe("PostgresRunStore — v2 nested writes (run + related rows via nested Prisma create)", () => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 6829b711c2a..7dce3bc39b7 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -844,14 +844,31 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = client ?? this.readOnlyPrisma; + // Offset pagination can't be expressed across two tables: applying `skip` + // to each table independently skips N rows from each, not N from the merged + // result. Reject it rather than silently double-skip. No caller uses it; + // cross-table reads keyset-paginate on a where + (createdAt, id) orderBy. + if (args.skip !== undefined) { + throw new Error( + "RunStore.findRuns: `skip` (offset pagination) is not supported across the legacy TaskRun " + + "and task_run_v2 tables. Use a where-based keyset (createdAt + id) instead." + ); + } + // A run lives in exactly one physical table, chosen by its id format, so a - // multi-row read must hit BOTH `TaskRun` (legacy cuid) and `task_run_v2` - // (new ksuid) and combine. `task_run_v2` is an identical clone of `TaskRun` - // (same relation surface), so the SAME `args` — crucially the SAME `where`, - // which is the security scope — run unchanged against either delegate. + // multi-row read generally hits BOTH `TaskRun` (legacy cuid) and + // `task_run_v2` (new ksuid) and combines. `task_run_v2` is an identical + // clone of `TaskRun` (same relation surface), so the SAME `args` (crucially + // the SAME `where`, which is the security scope) run unchanged against + // either delegate. When the predicate is an `id: { in: [...] }` list, the + // table with no candidate ids is skipped (a cuid can't live in task_run_v2, + // nor a ksuid in TaskRun), avoiding an empty query while task_run_v2 is + // unpopulated during rollout. const legacyModel = prisma.taskRun; const v2Model = prisma.taskRunV2 as unknown as typeof prisma.taskRun; + const { queryLegacy, queryV2 } = this.#tablesForWhere(args.where); + const ordered = this.#normalizeOrderBy(args.orderBy); // ORDERED + LIMITED → bounded 2-way merge. @@ -881,8 +898,8 @@ export class PostgresRunStore implements RunStore { const perTableArgs = { ...queryArgs, take: args.take }; const [legacyRows, v2Rows] = (await Promise.all([ - legacyModel.findMany(perTableArgs), - v2Model.findMany(perTableArgs), + queryLegacy ? legacyModel.findMany(perTableArgs) : Promise.resolve([]), + queryV2 ? v2Model.findMany(perTableArgs) : Promise.resolve([]), ])) as [Array>, Array>]; const merged = this.#mergeOrdered(legacyRows, v2Rows, comparator, args.take); @@ -901,8 +918,8 @@ export class PostgresRunStore implements RunStore { : { args, addedKeys: [] as string[] }; const [legacyRows, v2Rows] = (await Promise.all([ - legacyModel.findMany(queryArgs), - v2Model.findMany(queryArgs), + queryLegacy ? legacyModel.findMany(queryArgs) : Promise.resolve([]), + queryV2 ? v2Model.findMany(queryArgs) : Promise.resolve([]), ])) as [Array>, Array>]; let combined = legacyRows.concat(v2Rows); @@ -924,6 +941,38 @@ export class PostgresRunStore implements RunStore { return this.#stripAddedKeys(combined, addedKeys); } + /** + * Which physical tables a `findRuns` predicate can match. A run id encodes + * its table, so an `id: { in: [...] }` list containing only cuids cannot match + * `task_run_v2` (and a ksuid-only list cannot match `TaskRun`): the table with + * no candidate ids is skipped, avoiding a wasted query against an empty + * `task_run_v2` during rollout. An empty `in` list matches nothing, so both + * are skipped. Any other predicate must consult both tables. + */ + #tablesForWhere(where: Prisma.TaskRunWhereInput): { queryLegacy: boolean; queryV2: boolean } { + const idFilter = where.id; + const idIn = + idFilter !== null && typeof idFilter === "object" && "in" in idFilter + ? (idFilter as { in?: unknown }).in + : undefined; + + if (Array.isArray(idIn)) { + let queryLegacy = false; + let queryV2 = false; + for (const id of idIn) { + if (typeof id === "string" && isKsuidId(id)) { + queryV2 = true; + } else { + queryLegacy = true; + } + if (queryLegacy && queryV2) break; + } + return { queryLegacy, queryV2 }; + } + + return { queryLegacy: true, queryV2: true }; + } + /** * The cross-table merge/sort compares order-key VALUES read off each returned * row, so every scalar order key must be present in the projection. When the From 5ebea983a9a8bde9ea55cbc1b6c236a1e8d46ab4 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 11:32:14 +0100 Subject: [PATCH 47/83] fix(run-store): correct single-table findRuns ordering and cross-table merge collation A single-format id-list narrows findRuns to one physical table, but the ordered+limited path still built the cross-table comparator and threw the time-key guard; it now delegates natively to the one table (Postgres orders within a single table fine). Separately, the in-memory merge comparator ordered strings by code unit while the Postgres keyset continuation orders by the database collation (en_US); switching the comparator to localeCompare makes them agree, so a tied-createdAt boundary spanning both tables no longer skips or duplicates a row. --- .../run-store/src/PostgresRunStore.test.ts | 175 ++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 58 ++++-- 2 files changed, 214 insertions(+), 19 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 8d9fa62fd76..821fb53cc7d 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -1789,6 +1789,8 @@ describe("PostgresRunStore — table routing by id format", () => { idempotencyKey?: string; taskIdentifier?: string; createdAt?: Date; + parentTaskRunId?: string; + rootTaskRunId?: string; } ) { const delegate = isKsuidId(params.id) @@ -1817,6 +1819,8 @@ describe("PostgresRunStore — table routing by id format", () => { depth: 0, ...(params.idempotencyKey !== undefined && { idempotencyKey: params.idempotencyKey }), ...(params.createdAt !== undefined && { createdAt: params.createdAt }), + ...(params.parentTaskRunId !== undefined && { parentTaskRunId: params.parentTaskRunId }), + ...(params.rootTaskRunId !== undefined && { rootTaskRunId: params.rootTaskRunId }), }, }); } @@ -2440,6 +2444,177 @@ describe("PostgresRunStore — table routing by id format", () => { expect(ids(await store.findRuns({ where: { id: { in: [] } }, select: { id: true } }))).toEqual([]); } ); + + postgresTest( + "findRuns with a single-format id-list + non-time orderBy + take orders natively without the cross-table guard", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // Two v2 (ksuid) runs. + const k1 = RunId.generateKsuid(); + const k2 = RunId.generateKsuid(); + for (const r of [k1, k2]) { + await seedRoutedRun(prisma, { + id: r.id, + friendlyId: r.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + } + + // An all-ksuid id-list addresses task_run_v2 alone, so ordering by `id` + // (or any non-time key) with `take` must NOT trip the cross-table + // time-key guard — id is a valid total order within a single table. + const rows = (await store.findRuns({ + where: { id: { in: [k1.id, k2.id] } }, + select: { id: true }, + orderBy: { id: "asc" }, + take: 10, + })) as Array<{ id: string }>; + expect(rows.map((r) => r.id)).toEqual([k1.id, k2.id].sort()); + + // Same for an all-cuid id-list (legacy table only). + const c1 = RunId.generate(); + const c2 = RunId.generate(); + for (const r of [c1, c2]) { + await seedRoutedRun(prisma, { + id: r.id, + friendlyId: r.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }); + } + const legacyRows = (await store.findRuns({ + where: { id: { in: [c1.id, c2.id] } }, + select: { id: true }, + orderBy: { id: "desc" }, + take: 10, + })) as Array<{ id: string }>; + expect(legacyRows.map((r) => r.id)).toEqual([c1.id, c2.id].sort().reverse()); + } + ); + + postgresTest( + "merged keyset cursor enumerates every row exactly once at a tied createdAt across both tables (collation boundary)", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // All four rows share the SAME createdAt, so pagination relies entirely on + // the id tiebreak. Hand-crafted ids straddle the collation divergence: a + // 27-char ksuid leading with an UPPERCASE letter routes to task_run_v2, + // and a lowercase cuid routes to TaskRun. Under the DB's en_US collation + // "c" < "Z", but by raw code unit "Z" < "c" — if the in-memory merge and + // the Postgres keyset disagree, a row is skipped or duplicated here. + const sameTime = new Date("2026-06-01T00:00:00.000Z"); + const seeds = [ + "Z" + "0".repeat(26), // ksuid -> task_run_v2 (uppercase lead) + "A" + "1".repeat(26), // ksuid -> task_run_v2 + "c" + "z".repeat(24), // cuid -> TaskRun (25 chars) + "c" + "a".repeat(24), // cuid -> TaskRun + ]; + for (const id of seeds) { + await seedRoutedRun(prisma, { + id, + friendlyId: `run_${id}`, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + createdAt: sameTime, + }); + } + + // Paginate exactly like runsBackfiller: orderBy [createdAt asc, id asc], + // take 1 (forces the tie boundary on every page), cursor = (createdAt, id). + const seen: string[] = []; + let cursor: { createdAt: Date; id: string } | undefined; + for (let guard = 0; guard < 25; guard++) { + const page = (await store.findRuns({ + where: { + runtimeEnvironmentId: environment.id, + ...(cursor + ? { + OR: [ + { createdAt: { gt: cursor.createdAt } }, + { createdAt: cursor.createdAt, id: { gt: cursor.id } }, + ], + } + : {}), + }, + select: { id: true, createdAt: true }, + orderBy: [{ createdAt: "asc" }, { id: "asc" }], + take: 1, + })) as Array<{ id: string; createdAt: Date }>; + if (page.length === 0) break; + seen.push(page[0].id); + cursor = { createdAt: page[0].createdAt, id: page[0].id }; + } + + // Every seeded row enumerated exactly once: no skip, no duplicate. + expect(seen.slice().sort()).toEqual(seeds.slice().sort()); + expect(new Set(seen).size).toBe(seeds.length); + } + ); + + postgresTest( + "cross-table run hierarchy resolves parent by id and children by predicate across both tables", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const base = { + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + }; + + // Legacy cuid PARENT in TaskRun, v2 ksuid CHILD in task_run_v2 pointing at + // it (a hierarchy straddling a runTableV2 flip). This is what the + // presenters resolve via hydrateParentAndRoot / hydrateChildRuns. + const parent = RunId.generate(); + const child = RunId.generateKsuid(); + await seedRoutedRun(prisma, { ...base, id: parent.id, friendlyId: parent.friendlyId }); + await seedRoutedRun(prisma, { + ...base, + id: child.id, + friendlyId: child.friendlyId, + parentTaskRunId: parent.id, + rootTaskRunId: parent.id, + }); + + // child -> parent: by-id read routes to the legacy table. + const resolvedParent = await store.findRun({ id: parent.id }, { select: { id: true } }); + expect(resolvedParent?.id).toBe(parent.id); + // parent -> children: a parentTaskRunId predicate spans both tables and + // finds the v2 child of the legacy parent. + const children = (await store.findRuns({ + where: { parentTaskRunId: parent.id }, + select: { id: true }, + })) as Array<{ id: string }>; + expect(children.map((c) => c.id)).toEqual([child.id]); + + // Mirror: ksuid parent in task_run_v2, cuid child in TaskRun. + const parent2 = RunId.generateKsuid(); + const child2 = RunId.generate(); + await seedRoutedRun(prisma, { ...base, id: parent2.id, friendlyId: parent2.friendlyId }); + await seedRoutedRun(prisma, { + ...base, + id: child2.id, + friendlyId: child2.friendlyId, + parentTaskRunId: parent2.id, + rootTaskRunId: parent2.id, + }); + const resolvedParent2 = await store.findRun({ id: parent2.id }, { select: { id: true } }); + expect(resolvedParent2?.id).toBe(parent2.id); + const children2 = (await store.findRuns({ + where: { parentTaskRunId: parent2.id }, + select: { id: true }, + })) as Array<{ id: string }>; + expect(children2.map((c) => c.id)).toEqual([child2.id]); + } + ); }); describe("PostgresRunStore — v2 nested writes (run + related rows via nested Prisma create)", () => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 7dce3bc39b7..908137d4fab 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -844,10 +844,36 @@ export class PostgresRunStore implements RunStore { ): Promise { const prisma = client ?? this.readOnlyPrisma; - // Offset pagination can't be expressed across two tables: applying `skip` - // to each table independently skips N rows from each, not N from the merged - // result. Reject it rather than silently double-skip. No caller uses it; - // cross-table reads keyset-paginate on a where + (createdAt, id) orderBy. + // A run lives in exactly one physical table, chosen by its id format. An + // `id: { in: [...] }` predicate of a single id format addresses ONE table; + // any other predicate may span both `TaskRun` (legacy cuid) and + // `task_run_v2` (new ksuid). `task_run_v2` is an identical clone of + // `TaskRun` (same relation surface), so the SAME `args` (crucially the SAME + // `where`, the security scope) run unchanged against either delegate. + const legacyModel = prisma.taskRun; + const v2Model = prisma.taskRunV2 as unknown as typeof prisma.taskRun; + + const { queryLegacy, queryV2 } = this.#tablesForWhere(args.where); + + // No candidate table (e.g. an empty `id: { in: [] }`) → matches nothing. + if (!queryLegacy && !queryV2) { + return []; + } + + // Exactly one physical table is in play. There's no cross-table merge, so + // delegate to that table's `findMany` with the args verbatim: Postgres + // orders natively (ordering by any column, incl. `id`, is a valid total + // order WITHIN one table) and `skip`/`cursor`/`take` are all + // single-table-valid. Only the both-table path below needs the in-memory + // comparator/merge and its keyset restrictions. + if (queryLegacy !== queryV2) { + const model = queryLegacy ? legacyModel : v2Model; + return model.findMany(args as Prisma.TaskRunFindManyArgs); + } + + // BOTH tables in play. Offset pagination can't be expressed across two + // tables (applying `skip` to each skips N rows from its own result, not N + // from the merged result), so reject it rather than silently double-skip. if (args.skip !== undefined) { throw new Error( "RunStore.findRuns: `skip` (offset pagination) is not supported across the legacy TaskRun " + @@ -855,20 +881,6 @@ export class PostgresRunStore implements RunStore { ); } - // A run lives in exactly one physical table, chosen by its id format, so a - // multi-row read generally hits BOTH `TaskRun` (legacy cuid) and - // `task_run_v2` (new ksuid) and combines. `task_run_v2` is an identical - // clone of `TaskRun` (same relation surface), so the SAME `args` (crucially - // the SAME `where`, which is the security scope) run unchanged against - // either delegate. When the predicate is an `id: { in: [...] }` list, the - // table with no candidate ids is skipped (a cuid can't live in task_run_v2, - // nor a ksuid in TaskRun), avoiding an empty query while task_run_v2 is - // unpopulated during rollout. - const legacyModel = prisma.taskRun; - const v2Model = prisma.taskRunV2 as unknown as typeof prisma.taskRun; - - const { queryLegacy, queryV2 } = this.#tablesForWhere(args.where); - const ordered = this.#normalizeOrderBy(args.orderBy); // ORDERED + LIMITED → bounded 2-way merge. @@ -1153,7 +1165,15 @@ export class PostgresRunStore implements RunStore { if (typeof a === "number" && typeof b === "number") { return a - b; } - return String(a) < String(b) ? -1 : String(a) > String(b) ? 1 : 0; + // String (id) order MUST match Postgres's collation: this comparator merges + // the two per-table streams IN MEMORY, but the keyset continuation + // (`id > cursor`) that fetches the next page is evaluated BY Postgres. If + // the two disagree, a tied-createdAt boundary across the tables silently + // skips or duplicates a row. The run-table id columns use the database + // collation (en_US.utf8), whose ordering of the id charset [0-9A-Za-z] + // matches `localeCompare("en-US")` (verified) but NOT raw code-unit order + // (e.g. "c" < "Z" under en_US, yet "Z" < "c" by code unit). + return String(a).localeCompare(String(b), "en-US"); } /** From 6a2b4e3cca316009db149e7894109d7d346f7bbf Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 11:32:14 +0100 Subject: [PATCH 48/83] fix(webapp): serialise idempotency claims for v2-cutover orgs The pre-gate idempotency claim was eligible only when the org was on the mollifier. Concurrent same-key triggers that straddle a runTableV2 flip can mint into different physical tables, whose per-table unique constraints can't see each other, so two runs could share one key. The claim is now also eligible when the org is cut over to the v2 run table, serialising those triggers through Redis. --- .../concerns/idempotencyKeys.server.ts | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 02d0ec957f2..fb054e862f7 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -11,6 +11,7 @@ import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.se import { claimOrAwait } from "~/v3/mollifier/idempotencyClaim.server"; import { makeResolveMollifierFlag } from "~/v3/mollifier/mollifierGate.server"; import { runStore } from "~/v3/runStore.server"; +import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; import type { TraceEventConcern, TriggerTaskRequest } from "../types"; // In-memory per-org mollifier-enabled check, shared with `evaluateGate` @@ -298,20 +299,28 @@ export class IdempotencyKeyConcern { // trigger hot path. Excluding them keeps the claim aligned with the // gate — if the gate would never mollify the request, there's no // buffer to serialise against. + // Also serialise when the org is cut over to the v2 run table, even if it + // isn't on the mollifier. Concurrent same-key triggers that straddle a + // `runTableV2` flag flip can mint into DIFFERENT physical tables (cuid -> + // TaskRun, ksuid -> task_run_v2); the per-table idempotency unique + // constraints can't see each other, so neither INSERT raises P2002 and two + // runs share one key. The Redis claim is the only backstop in that window. + const orgFeatureFlags = + (request.environment.organization?.featureFlags as + | Record + | null + | undefined) ?? null; const claimEligible = !request.body.options?.resumeParentOnCompletion && !request.body.options?.debounce && !request.options?.oneTimeUseToken && - (await resolveOrgMollifierFlag({ + ((await resolveOrgMollifierFlag({ envId: request.environment.id, orgId: request.environment.organizationId, taskId: request.taskId, - orgFeatureFlags: - ((request.environment.organization?.featureFlags as - | Record - | null - | undefined) ?? null), - })); + orgFeatureFlags, + })) || + shouldUseV2RunTable(orgFeatureFlags)); if (claimEligible) { const ttlSeconds = Math.max( 1, From b925f25984ce7252c04e48415ff6ed4055fdb491 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 11:32:14 +0100 Subject: [PATCH 49/83] fix(database,replication): pin task_run_v2 REPLICA IDENTITY FULL and warn when missing A v2 run DELETE needs the full old row so its ClickHouse soft-delete tombstone carries organization and environment ids; under the default replica identity those are dropped and the tombstone is lost. A migration sets REPLICA IDENTITY FULL on task_run_v2 rather than relying on an out-of-band step, and the replication client now warns when any co-published table that publishes UPDATE/DELETE lacks FULL. Adds a replication test for the v2 DELETE tombstone. --- .../runsReplicationService.taskRunV2.test.ts | 123 ++++++++++++++++++ .../migration.sql | 9 ++ internal-packages/replication/src/client.ts | 56 ++++++++ 3 files changed, 188 insertions(+) create mode 100644 internal-packages/database/prisma/migrations/20260622120000_task_run_v2_replica_identity_full/migration.sql diff --git a/apps/webapp/test/runsReplicationService.taskRunV2.test.ts b/apps/webapp/test/runsReplicationService.taskRunV2.test.ts index 9f0c6249feb..af11bf906a7 100644 --- a/apps/webapp/test/runsReplicationService.taskRunV2.test.ts +++ b/apps/webapp/test/runsReplicationService.taskRunV2.test.ts @@ -131,4 +131,127 @@ describe("RunsReplicationService (task_run_v2)", () => { } } ); + + replicationContainerTest( + "streams a task_run_v2 DELETE with a complete old row (REPLICA IDENTITY FULL) so the tombstone carries org id", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + // The migration sets this in production; the testcontainer builds via + // db push, so apply it here. Without FULL, the DELETE's old tuple is just + // the PK and organization_id below would be empty (tombstone dropped). + await prisma.$executeRawUnsafe(`ALTER TABLE public."task_run_v2" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication", + compression: { request: true }, + logLevel: "warn", + }); + + const { tracer } = createInMemoryTracing(); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + tracer, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + try { + const organization = await prisma.organization.create({ + data: { title: "test", slug: "test" }, + }); + const project = await prisma.project.create({ + data: { name: "test", slug: "test", organizationId: organization.id, externalRef: "test" }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + const ksuid = RunId.generateKsuid(); + const run = await prisma.taskRunV2.create({ + data: { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceId: "v2del", + spanId: "v2del", + queue: "test", + workerQueue: "us-east-1-next", + region: "us-east-1", + planType: "free", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + const latestRow = clickhouse.reader.query({ + name: "runs-replication", + query: + "SELECT run_id, organization_id, environment_id, _is_deleted FROM trigger_dev.task_runs_v2 WHERE run_id = {runId: String} ORDER BY _version DESC LIMIT 1", + schema: z.any(), + params: z.object({ runId: z.string() }), + }); + + // Wait for the INSERT to land. + let result: Array> | undefined; + let insertDeadline = Date.now() + 10_000; + do { + const [, rows] = await latestRow({ runId: run.id }); + result = rows; + if (result?.length === 1 && Number(result[0]._is_deleted) === 0) break; + await setTimeout(200); + } while (Date.now() < insertDeadline); + expect(result?.length).toBe(1); + + // Delete the v2 run and wait for the tombstone. + await prisma.taskRunV2.delete({ where: { id: run.id } }); + + const deleteDeadline = Date.now() + 10_000; + do { + const [, rows] = await latestRow({ runId: run.id }); + result = rows; + if (result?.length === 1 && Number(result[0]._is_deleted) === 1) break; + await setTimeout(200); + } while (Date.now() < deleteDeadline); + + // The tombstone must carry the full old row (org/env), not just the PK. + expect(Number(result?.[0]?._is_deleted)).toBe(1); + expect(result?.[0]).toEqual( + expect.objectContaining({ + run_id: run.id, + organization_id: organization.id, + environment_id: runtimeEnvironment.id, + }) + ); + } finally { + await runsReplicationService.stop(); + } + } + ); }); diff --git a/internal-packages/database/prisma/migrations/20260622120000_task_run_v2_replica_identity_full/migration.sql b/internal-packages/database/prisma/migrations/20260622120000_task_run_v2_replica_identity_full/migration.sql new file mode 100644 index 00000000000..56f189efa99 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260622120000_task_run_v2_replica_identity_full/migration.sql @@ -0,0 +1,9 @@ +-- task_run_v2 is co-published to ClickHouse alongside TaskRun via logical +-- replication. Replication needs REPLICA IDENTITY FULL so UPDATE/DELETE WAL +-- events carry the full OLD row (organizationId, environmentType, ...) that the +-- ClickHouse transform requires. Without it, a v2 run DELETE ships only the +-- primary key, organizationId is undefined, and the run's ClickHouse +-- soft-delete tombstone is silently dropped (the deleted run lingers in +-- analytics). TaskRun is configured the same way; this pins it deterministically +-- for task_run_v2 rather than relying on an out-of-band ops step. +ALTER TABLE "public"."task_run_v2" REPLICA IDENTITY FULL; diff --git a/internal-packages/replication/src/client.ts b/internal-packages/replication/src/client.ts index 044c5e19320..562bccb5d6d 100644 --- a/internal-packages/replication/src/client.ts +++ b/internal-packages/replication/src/client.ts @@ -307,6 +307,8 @@ export class LogicalReplicationClient { startLsn, }); + await this.#warnOnWeakReplicaIdentity(); + const slotCreated = await this.#createSlot(); if (!slotCreated) { @@ -605,6 +607,60 @@ export class LogicalReplicationClient { return null; } + /** + * Warn (never fail) when a co-published table lacks REPLICA IDENTITY FULL while + * the publication emits UPDATE/DELETE. Under the default primary-key identity, + * a DELETE's WAL `old` tuple carries only the key, so a consumer that needs + * other columns of the deleted row (e.g. to build a ClickHouse soft-delete + * tombstone with organization/environment ids) silently loses them. This only + * surfaces a misconfiguration (a forgotten ops step or a db-push'd table); it + * never blocks startup. + */ + async #warnOnWeakReplicaIdentity(): Promise { + if (!this.client) { + return; + } + + const publishesOldTuple = + !this.options.publicationActions || + this.options.publicationActions.includes("update") || + this.options.publicationActions.includes("delete"); + if (!publishesOldTuple) { + return; + } + + const tableList = this.#allTables() + .map((table) => `'${table}'`) + .join(", "); + + const [error, res] = await tryCatch( + this.client.query( + `SELECT c.relname, c.relreplident + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = 'public' AND c.relname IN (${tableList})` + ) + ); + if (error || !res) { + return; // best-effort diagnostic; never block startup + } + + for (const row of res.rows as Array<{ relname: string; relreplident: string }>) { + if (row.relreplident !== "f") { + this.logger.warn( + "Co-published table lacks REPLICA IDENTITY FULL; UPDATE/DELETE WAL events will omit non-key columns of the old row", + { + name: this.options.name, + publicationName: this.options.publicationName, + table: row.relname, + replicaIdentity: row.relreplident, + fix: `ALTER TABLE "public"."${row.relname}" REPLICA IDENTITY FULL;`, + } + ); + } + } + } + async #createSlot(): Promise { if (!this.client) { this.events.emit("error", new LogicalReplicationClientError("Cannot create slot")); From 5282e019dba07e193599f7b951c3d4d32fd1357d Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 11:32:14 +0100 Subject: [PATCH 50/83] fix(webapp): resolve cross-table run parent/root/children in presenters A v2 run can reference a legacy parent/root, or have legacy children, when a hierarchy straddles a runTableV2 flip. Prisma relation selects are bound to one table, so the run, span, and API-retrieve presenters returned null parent/root and dropped cross-table children. They now resolve parent/root by id (RunStore routes by id format) and children by a both-table predicate, via a shared hydrateParentAndRoot/hydrateChildRuns helper. --- .../v3/ApiRetrieveRunPresenter.server.ts | 28 ++++++---- .../app/presenters/v3/RunPresenter.server.ts | 32 +++++------ .../app/presenters/v3/SpanPresenter.server.ts | 45 +++++++++------ apps/webapp/app/v3/runHierarchy.server.ts | 56 +++++++++++++++++++ 4 files changed, 117 insertions(+), 44 deletions(-) create mode 100644 apps/webapp/app/v3/runHierarchy.server.ts diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index 68e3643f9e9..a21d0b112fd 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -23,6 +23,7 @@ import { } from "~/v3/mollifier/readFallback.server"; import { generatePresignedUrl } from "~/v3/objectStore.server"; import { runStore } from "~/v3/runStore.server"; +import { hydrateParentAndRoot, hydrateChildRuns } from "~/v3/runHierarchy.server"; import { tracer } from "~/v3/tracer.server"; import { startSpanWithEnv } from "~/v3/tracing.server"; @@ -133,21 +134,28 @@ export class ApiRetrieveRunPresenter { attemptNumber: true, engine: true, taskEventStore: true, - parentTaskRun: { - select: commonRunSelect, - }, - rootTaskRun: { - select: commonRunSelect, - }, - childRuns: { - select: commonRunSelect, - }, + parentTaskRunId: true, + rootTaskRunId: true, }, }, $replica ); - if (pgRow) return { ...pgRow, isBuffered: false }; + if (pgRow) { + // Resolve parent/root/children across both run tables. A single Prisma + // relation select is table-bound, so a v2 run's legacy parent (or a + // legacy run's v2 children), which arise in the mixed window, would come + // back null/empty. Resolve parent/root by id (RunStore routes by format) + // and children by a both-table predicate. + const { parentTaskRun, rootTaskRun } = await hydrateParentAndRoot( + { parentTaskRunId: pgRow.parentTaskRunId, rootTaskRunId: pgRow.rootTaskRunId }, + commonRunSelect, + $replica + ); + const childRuns = await hydrateChildRuns(pgRow.id, commonRunSelect, $replica); + + return { ...pgRow, parentTaskRun, rootTaskRun, childRuns, isBuffered: false }; + } // Postgres miss → fall back to the mollifier buffer. When the gate // diverted a trigger, the run lives in Redis until the drainer replays diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index c4c3ac88c48..bb2daecf208 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -9,6 +9,7 @@ import { isFinalRunStatus } from "~/v3/taskStatus"; import { env } from "~/env.server"; import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { runStore } from "~/v3/runStore.server"; +import { hydrateParentAndRoot } from "~/v3/runHierarchy.server"; type Result = Awaited>; export type Run = Result["run"]; @@ -93,20 +94,8 @@ export class RunPresenter { completedAt: true, logsDeletedAt: true, annotations: true, - rootTaskRun: { - select: { - friendlyId: true, - spanId: true, - createdAt: true, - }, - }, - parentTaskRun: { - select: { - friendlyId: true, - spanId: true, - createdAt: true, - }, - }, + rootTaskRunId: true, + parentTaskRunId: true, runtimeEnvironment: { select: { id: true, @@ -143,6 +132,15 @@ export class RunPresenter { const showLogs = showDeletedLogs || !run.logsDeletedAt; + // Resolve parent/root across both physical run tables: a v2 run can have a + // legacy parent/root (or vice versa) in the mixed window, which a + // table-bound Prisma relation select would miss. + const { parentTaskRun, rootTaskRun } = await hydrateParentAndRoot( + { parentTaskRunId: run.parentTaskRunId, rootTaskRunId: run.rootTaskRunId }, + { friendlyId: true, spanId: true, createdAt: true }, + this.#prismaClient + ); + const runData = { id: run.id, number: run.number, @@ -154,8 +152,8 @@ export class RunPresenter { startedAt: run.startedAt, completedAt: run.completedAt, logsDeletedAt: showDeletedLogs ? null : run.logsDeletedAt, - rootTaskRun: run.rootTaskRun, - parentTaskRun: run.parentTaskRun, + rootTaskRun, + parentTaskRun, environment: { id: run.runtimeEnvironment.id, organizationId: run.runtimeEnvironment.organizationId, @@ -184,7 +182,7 @@ export class RunPresenter { getTaskEventStoreTableForRun(run), run.runtimeEnvironment.id, run.traceId, - run.rootTaskRun?.createdAt ?? run.createdAt, + rootTaskRun?.createdAt ?? run.createdAt, run.completedAt ?? undefined, { includeDebugLogs: showDebug } ); diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index 49d8f303560..e202b20fbf2 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -587,22 +587,9 @@ export class SpanPresenter extends BasePresenter { filePath: true, }, }, - //relationships - rootTaskRun: { - select: { - taskIdentifier: true, - friendlyId: true, - spanId: true, - createdAt: true, - }, - }, - parentTaskRun: { - select: { - taskIdentifier: true, - friendlyId: true, - spanId: true, - }, - }, + //relationships (resolved across both run tables after the fetch) + rootTaskRunId: true, + parentTaskRunId: true, batch: { select: { friendlyId: true, @@ -626,7 +613,31 @@ export class SpanPresenter extends BasePresenter { this._replica ); - return run; + if (!run) { + return run; + } + + // Resolve parent/root across both run tables: a v2 run can reference a + // legacy parent/root (or vice versa) in the mixed window, which a + // table-bound Prisma relation select on a single table would miss. + const [parentTaskRun, rootTaskRun] = await Promise.all([ + run.parentTaskRunId + ? runStore.findRun( + { id: run.parentTaskRunId }, + { select: { taskIdentifier: true, friendlyId: true, spanId: true } }, + this._replica + ) + : Promise.resolve(null), + run.rootTaskRunId + ? runStore.findRun( + { id: run.rootTaskRunId }, + { select: { taskIdentifier: true, friendlyId: true, spanId: true, createdAt: true } }, + this._replica + ) + : Promise.resolve(null), + ]); + + return { ...run, parentTaskRun, rootTaskRun }; } async #getSpan({ diff --git a/apps/webapp/app/v3/runHierarchy.server.ts b/apps/webapp/app/v3/runHierarchy.server.ts new file mode 100644 index 00000000000..d2f1489364b --- /dev/null +++ b/apps/webapp/app/v3/runHierarchy.server.ts @@ -0,0 +1,56 @@ +import type { Prisma, PrismaClientOrTransaction, PrismaReplicaClient } from "@trigger.dev/database"; +import { runStore } from "~/v3/runStore.server"; + +type ReadClient = PrismaClientOrTransaction | PrismaReplicaClient; + +/** + * Resolve a run's parent and root runs across BOTH physical run tables. + * + * A run's `parentTaskRunId`/`rootTaskRunId` are plain scalar ids whose target + * may live in either `TaskRun` (legacy cuid) or `task_run_v2` (new ksuid) — for + * example a v2 child of a legacy parent, created while the org's `runTableV2` + * flag was mid-flip. A single Prisma relation select (`parentTaskRun { ... }`) + * is bound to one table and silently returns `null` for such a cross-table + * parent/root. Resolving each by id instead lets RunStore route to the correct + * table by id format. Pass the same `select` the caller would have used on the + * relation. + */ +export async function hydrateParentAndRoot( + ids: { parentTaskRunId: string | null; rootTaskRunId: string | null }, + select: S, + client?: ReadClient +): Promise<{ + parentTaskRun: Prisma.TaskRunGetPayload<{ select: S }> | null; + rootTaskRun: Prisma.TaskRunGetPayload<{ select: S }> | null; +}> { + const [parentTaskRun, rootTaskRun] = await Promise.all([ + ids.parentTaskRunId + ? runStore.findRun({ id: ids.parentTaskRunId }, { select }, client) + : Promise.resolve(null), + ids.rootTaskRunId + ? runStore.findRun({ id: ids.rootTaskRunId }, { select }, client) + : Promise.resolve(null), + ]); + + return { + parentTaskRun: parentTaskRun as Prisma.TaskRunGetPayload<{ select: S }> | null, + rootTaskRun: rootTaskRun as Prisma.TaskRunGetPayload<{ select: S }> | null, + }; +} + +/** + * A run's direct child runs across BOTH physical tables. Children reference the + * parent by the scalar `parentTaskRunId`, and a v2 parent can have legacy cuid + * children (or vice versa) in the mixed window, so this is a non-id predicate + * read that `findRuns` resolves against both tables. + */ +export async function hydrateChildRuns( + parentRunId: string, + select: S, + client?: ReadClient +): Promise[]> { + return runStore.findRuns( + { where: { parentTaskRunId: parentRunId }, select }, + client + ) as Promise[]>; +} From 24b0f8769a817b568326ef46c7adcec4006c9b9b Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 11:42:30 +0100 Subject: [PATCH 51/83] fix(run-store): prefer task_run_v2 on cross-table single-row reads When a non-id predicate matches a row in both physical tables, findFirstAcrossTables now returns the v2 copy instead of legacy. Under this PR a run is in exactly one table (createRun routes by id format), so this is a no-op today; it forward-aligns with the later slow legacy to v2 migration, which copies a run into task_run_v2 (the canonical, operated-on copy) before operating. A comment in findRuns marks the matching dedup-by-id work for that migration PR. --- .../run-store/src/PostgresRunStore.ts | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 908137d4fab..5e0f38b828a 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -80,13 +80,18 @@ export class PostgresRunStore implements RunStore { /** * Read a single row matching a non-id predicate from BOTH physical tables. - * A run lives in exactly one table (chosen by its id format), so a key-based - * predicate (idempotency key, "has this env any runs") can match a row in - * either. Query both in parallel and return the first match — at most one - * side is non-null, and legacy is preferred for a stable result if a - * predicate ever matches both. `task_run_v2` is an identical clone of - * `TaskRun`, so the SAME args (select/include and the security-scoping - * `where`) run unchanged against either delegate. + * A key-based predicate (idempotency key, "has this env any runs") can match + * a row in either table. Query both in parallel and return the match, + * preferring `task_run_v2` when both are non-null. + * + * Today a run lives in exactly one table (createRun routes by id format), so + * at most one side is non-null and the preference never bites. The later + * slow legacy->v2 migration copies a run into task_run_v2 before operating on + * it, so it transiently lives in BOTH tables with the v2 copy as the + * canonical/operated-on one; preferring v2 returns the current row, not the + * stale legacy source. `task_run_v2` is an identical clone of `TaskRun`, so + * the SAME args (select/include and the security-scoping `where`) run + * unchanged against either delegate. */ async #findFirstAcrossTables( prisma: ReadClient, @@ -100,7 +105,7 @@ export class PostgresRunStore implements RunStore { v2Model.findFirst({ where, ...args }), ]); - return legacyRun ?? v2Run; + return v2Run ?? legacyRun; } async createRun( @@ -871,9 +876,22 @@ export class PostgresRunStore implements RunStore { return model.findMany(args as Prisma.TaskRunFindManyArgs); } - // BOTH tables in play. Offset pagination can't be expressed across two - // tables (applying `skip` to each skips N rows from its own result, not N - // from the merged result), so reject it rather than silently double-skip. + // BOTH tables in play. + // + // FORWARD-LOOKING (slow legacy->v2 migration, a later stage): that migration + // copies a run into task_run_v2 before operating on it, so a run can briefly + // live in BOTH tables. When that lands, the cross-table reads below (both the + // ordered #mergeOrdered path AND the unordered concat) must DEDUP BY id, + // keeping the canonical v2 copy, or a doubly-present run is returned twice. + // Dedup needs `id` forced into the projection (and stripped when the caller + // didn't select it), and the "v2 wins" policy is part of the copy protocol, + // so it belongs with the migration PR that introduces the overlap. Today + // createRun routes by id format, so no run is in both tables and concatenation + // is already duplicate-free. + // + // Offset pagination can't be expressed across two tables (applying `skip` to + // each skips N rows from its own result, not N from the merged result), so + // reject it rather than silently double-skip. if (args.skip !== undefined) { throw new Error( "RunStore.findRuns: `skip` (offset pagination) is not supported across the legacy TaskRun " + From df8a7a8a7faaa5b401ef013c3351ae4cf82c6a83 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 14:20:39 +0100 Subject: [PATCH 52/83] fix(database): drop unused task_run_v2 m2m relations TaskRunV2 declared implicit many-to-many relations (tags, connectedWaitpoints) whose join tables were never created by any migration and are absent from the database. Nothing reads them (v2 run tags use the scalar runTags array), so they were pure schema-vs-migration drift. Removing them makes the schema match the database with no migration. --- internal-packages/database/prisma/schema.prisma | 6 ------ 1 file changed, 6 deletions(-) diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 5668a5ac93c..4244b43d602 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -1168,7 +1168,6 @@ model TaskRunV2 { updatedAt DateTime @updatedAt attempts TaskRunAttempt[] @relation("attemptsV2") - tags TaskRunTag[] @relation("taskRunTagsV2") /// Denormized column that holds the raw tags runTags String[] @@ -1220,9 +1219,6 @@ model TaskRunV2 { ///If there are any blocked waitpoints, the run won't be executed blockedByWaitpoints TaskRunWaitpoint[] @relation("taskRunWaitpointsV2") - /// All waitpoints that blocked this run at some point, used for display purposes - connectedWaitpoints Waitpoint[] @relation("WaitpointRunConnectionsV2") - /// Where the logs are stored taskEventStore String @default("taskEvent") @@ -1596,7 +1592,6 @@ model Waitpoint { /// All runs that have ever been blocked by this waitpoint, used for display purposes connectedRuns TaskRun[] @relation("WaitpointRunConnections") - connectedRunsV2 TaskRunV2[] @relation("WaitpointRunConnectionsV2") /// When a waitpoint is complete completedExecutionSnapshots TaskRunExecutionSnapshot[] @relation("completedWaitpoints") @@ -1814,7 +1809,6 @@ model TaskRunTag { friendlyId String @unique runs TaskRun[] - runsV2 TaskRunV2[] @relation("taskRunTagsV2") project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) projectId String From 18a67c2d91c732b2df100cc80205556139747710 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 14:20:39 +0100 Subject: [PATCH 53/83] fix(run-store): guard cross-table cursor/take and route plain id reads findRuns rejects a Prisma cursor or a negative take on a both-tables read (neither can span two tables) instead of silently returning a wrong or empty result, and tablesForWhere now routes a plain id or friendlyId equality to the single matching table by id format, not just id:{in} lists. Also documents that the cross-table merge comparator assumes the en_US database collation and the COLLATE C fix needed for other collations. --- .../run-store/src/PostgresRunStore.ts | 89 +++++++++++++++---- 1 file changed, 72 insertions(+), 17 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 5e0f38b828a..242daf19fb6 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -23,6 +23,24 @@ import type { import type { TaskRunError } from "@trigger.dev/core/v3/schemas"; import { isKsuidId } from "@trigger.dev/core/v3/isomorphic"; +// Extract a plain string equality from a Prisma string filter — a bare string +// or `{ equals: "..." }`. Returns undefined for any other operator shape (in, +// not, contains, etc.), which callers treat as "can't narrow to one table". +function stringEquality(filter: unknown): string | undefined { + if (typeof filter === "string") { + return filter; + } + if ( + filter !== null && + typeof filter === "object" && + "equals" in filter && + typeof (filter as { equals?: unknown }).equals === "string" + ) { + return (filter as { equals: string }).equals; + } + return undefined; +} + export type PostgresRunStoreOptions = { prisma: PrismaClient; readOnlyPrisma: PrismaReplicaClient; @@ -901,20 +919,26 @@ export class PostgresRunStore implements RunStore { const ordered = this.#normalizeOrderBy(args.orderBy); + // Both tables are queried here (single-table reads were delegated earlier). + // A Prisma `cursor` addresses one row in one table, and a negative `take` + // (Prisma "last N") is meaningless across a 2-way merge — neither can span + // both tables. No caller pairs either with a cross-table read; reject + // loudly rather than silently returning a wrong or empty result. Keyset + // callers carry their cursor in `where`, which both per-table queries honor. + if (args.cursor !== undefined) { + throw new Error( + "RunStore.findRuns: a Prisma `cursor` cannot span both run tables. " + + "Use a where-based keyset (e.g. `where: { createdAt: { lt: X } }`) instead." + ); + } + if (typeof args.take === "number" && args.take < 0) { + throw new Error( + "RunStore.findRuns: a negative `take` (Prisma 'last N') is not supported across both run tables." + ); + } + // ORDERED + LIMITED → bounded 2-way merge. - // - // A single Prisma `cursor` addresses one table's row and cannot span two - // tables, so reject it on this path rather than silently paginating one - // table. (No current caller pairs `cursor` with `orderBy`+`take`; keyset - // callers carry the cursor in `where`, which both queries honor.) if (ordered.length > 0 && args.take !== undefined) { - if (args.cursor !== undefined) { - throw new Error( - "RunStore.findRuns: a Prisma `cursor` cannot address two tables on an ordered+limited read. " + - "Use a where-based keyset (e.g. `where: { createdAt: { lt: X } }`) instead." - ); - } - const comparator = this.#buildCrossTableComparator(ordered); // The in-memory comparator reads the order keys off each row, so they @@ -1000,6 +1024,27 @@ export class PostgresRunStore implements RunStore { return { queryLegacy, queryV2 }; } + // Plain id equality (string or `{ equals: string }`) also pins the table: + // a single id encodes its format, so route to the matching table and skip + // the other (which can't contain it). Mirrors the `id: { in }` partition. + const idEquals = stringEquality(idFilter); + if (idEquals !== undefined) { + return isKsuidId(idEquals) + ? { queryLegacy: false, queryV2: true } + : { queryLegacy: true, queryV2: false }; + } + + // friendlyId equality (`run_`) likewise pins the table by id format. + const friendlyEquals = stringEquality(where.friendlyId); + if (friendlyEquals !== undefined) { + const rawId = friendlyEquals.startsWith("run_") + ? friendlyEquals.slice("run_".length) + : friendlyEquals; + return isKsuidId(rawId) + ? { queryLegacy: false, queryV2: true } + : { queryLegacy: true, queryV2: false }; + } + return { queryLegacy: true, queryV2: true }; } @@ -1186,11 +1231,21 @@ export class PostgresRunStore implements RunStore { // String (id) order MUST match Postgres's collation: this comparator merges // the two per-table streams IN MEMORY, but the keyset continuation // (`id > cursor`) that fetches the next page is evaluated BY Postgres. If - // the two disagree, a tied-createdAt boundary across the tables silently - // skips or duplicates a row. The run-table id columns use the database - // collation (en_US.utf8), whose ordering of the id charset [0-9A-Za-z] - // matches `localeCompare("en-US")` (verified) but NOT raw code-unit order - // (e.g. "c" < "Z" under en_US, yet "Z" < "c" by code unit). + // the two disagree, a tied-createdAt boundary that straddles BOTH tables can + // silently skip or duplicate a row. The run-table id columns inherit the + // database collation, which on Trigger.dev Cloud (and the default Postgres + // locale on most systems) is en_US.utf8 — whose ordering of the id charset + // [0-9A-Za-z] matches `localeCompare("en-US")` (verified exhaustively over + // every base62 2-gram) but NOT raw code-unit order (e.g. "c" < "Z" under + // en_US, yet "Z" < "c" by code unit). + // + // CAVEAT (self-hosters): this hard-codes the en_US assumption. A database + // with a different collation ("C"/"POSIX" byte order, or another locale) can + // disagree with localeCompare("en-US") and skip/duplicate a run at the + // narrow tied-createdAt cross-table boundary. The collation-independent fix + // is to force `COLLATE "C"` on the id in BOTH the per-table keyset ORDER BY + // and this comparator (byte order on both sides); deferred because it needs + // the keyset expressed as raw SQL rather than a Prisma `orderBy`. return String(a).localeCompare(String(b), "en-US"); } From f6410917c6dc3b784f1ec8d0fbb60790bbe00108 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 14:20:39 +0100 Subject: [PATCH 54/83] fix(webapp): back idempotency claims with Redis when the mollifier is off Concurrent same-key triggers that straddle a runTableV2 flag flip can mint into different physical tables (cuid to TaskRun, ksuid to task_run_v2), whose per-table unique constraints cannot see each other, so neither insert conflicts and two runs share one key. The pre-gate claim now resolves its backend through a claim-only Redis buffer when the mollifier buffer is absent, so it serialises these triggers instead of falling open. v2-cutover orgs are claim-eligible for every idempotency-keyed trigger, including triggerAndWait, debounce, and one-time-use tokens, and the claim-resolved path blocks the parent on the winner's waitpoint. --- .../concerns/idempotencyKeys.server.ts | 174 +++++++++++------- .../v3/mollifier/idempotencyClaim.server.ts | 19 +- .../v3/mollifier/mollifierBuffer.server.ts | 40 ++++ .../test/mollifierClaimResolution.test.ts | 14 +- .../test/mollifierResetIdempotencyKey.test.ts | 13 ++ 5 files changed, 182 insertions(+), 78 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index fb054e862f7..e8dd61c5c87 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -1,5 +1,5 @@ import { RunId } from "@trigger.dev/core/v3/isomorphic"; -import type { PrismaClientOrTransaction, TaskRun } from "@trigger.dev/database"; +import type { Prisma, PrismaClientOrTransaction, TaskRun } from "@trigger.dev/database"; import { env } from "~/env.server"; import { logger } from "~/services/logger.server"; import { resolveIdempotencyKeyTTL } from "~/utils/idempotencyKeys.server"; @@ -137,6 +137,73 @@ export class IdempotencyKeyConcern { return synthetic as unknown as TaskRun; } + // Return an already-resolved idempotent run as a cache hit, blocking the + // parent on the run's waitpoint when this is a triggerAndWait + // (`resumeParentOnCompletion`). Shared by the direct PG/buffer existing-run + // path and the claim-`resolved` path (a concurrent same-key trigger that won + // the claim): a v2-cutover triggerAndWait that loses the claim must still + // block its parent, because the per-table unique constraints don't dedup + // across TaskRun/task_run_v2 — the claim is what serialises these. + private async returnCachedIdempotentRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + existingRun: Prisma.TaskRunGetPayload<{ include: { associatedWaitpoint: true } }>, + idempotencyKey: string + ): Promise { + const parentRunId = request.body.options?.parentRunId; + const resumeParentOnCompletion = request.body.options?.resumeParentOnCompletion; + + //We're using `andWait` so we need to block the parent run with a waitpoint + if (resumeParentOnCompletion && parentRunId) { + // Get or create waitpoint lazily (existing run may not have one if it was standalone) + let associatedWaitpoint = existingRun.associatedWaitpoint; + if (!associatedWaitpoint) { + associatedWaitpoint = await this.engine.getOrCreateRunWaitpoint({ + runId: existingRun.id, + projectId: request.environment.projectId, + environmentId: request.environment.id, + }); + } + + await this.traceEventConcern.traceIdempotentRun( + request, + parentStore, + { + existingRun, + idempotencyKey, + incomplete: associatedWaitpoint.status === "PENDING", + isError: associatedWaitpoint.outputIsError, + }, + async (event) => { + const spanId = + request.options?.parentAsLinkType === "replay" + ? event.spanId + : event.traceparent?.spanId + ? `${event.traceparent.spanId}:${event.spanId}` + : event.spanId; + + //block run with waitpoint + await this.engine.blockRunWithWaitpoint({ + runId: RunId.fromFriendlyId(parentRunId), + waitpoints: associatedWaitpoint!.id, + spanIdToComplete: spanId, + batch: request.options?.batchId + ? { + id: request.options.batchId, + index: request.options.batchIndex ?? 0, + } + : undefined, + projectId: request.environment.projectId, + organizationId: request.environment.organizationId, + tx: this.prisma, + }); + } + ); + } + + return { isCached: true, run: existingRun }; + } + async handleTriggerRequest( request: TriggerTaskRequest, parentStore: string | undefined @@ -220,66 +287,18 @@ export class IdempotencyKeyConcern { return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } - // We have an idempotent run, so we return it - const parentRunId = request.body.options?.parentRunId; - const resumeParentOnCompletion = request.body.options?.resumeParentOnCompletion; - - //We're using `andWait` so we need to block the parent run with a waitpoint - if (resumeParentOnCompletion && parentRunId) { - // Get or create waitpoint lazily (existing run may not have one if it was standalone) - let associatedWaitpoint = existingRun.associatedWaitpoint; - if (!associatedWaitpoint) { - associatedWaitpoint = await this.engine.getOrCreateRunWaitpoint({ - runId: existingRun.id, - projectId: request.environment.projectId, - environmentId: request.environment.id, - }); - } - - await this.traceEventConcern.traceIdempotentRun( - request, - parentStore, - { - existingRun, - idempotencyKey, - incomplete: associatedWaitpoint.status === "PENDING", - isError: associatedWaitpoint.outputIsError, - }, - async (event) => { - const spanId = - request.options?.parentAsLinkType === "replay" - ? event.spanId - : event.traceparent?.spanId - ? `${event.traceparent.spanId}:${event.spanId}` - : event.spanId; - - //block run with waitpoint - await this.engine.blockRunWithWaitpoint({ - runId: RunId.fromFriendlyId(parentRunId), - waitpoints: associatedWaitpoint!.id, - spanIdToComplete: spanId, - batch: request.options?.batchId - ? { - id: request.options.batchId, - index: request.options.batchIndex ?? 0, - } - : undefined, - projectId: request.environment.projectId, - organizationId: request.environment.organizationId, - tx: this.prisma, - }); - } - ); - } - - return { isCached: true, run: existingRun }; + // We have an idempotent run, so we return it (blocking the parent on its + // waitpoint for triggerAndWait). + return this.returnCachedIdempotentRun(request, parentStore, existingRun, idempotencyKey); } // Pre-gate claim — closes the PG+buffer race during gate transition. // All same-key triggers serialise here before evaluateGate decides - // PG-pass-through vs mollify. Skipped for triggerAndWait - // (resumeParentOnCompletion) — that path bypasses the gate entirely - // and its existing PG-side dedup is sufficient. + // PG-pass-through vs mollify. For mollifier-only orgs this is skipped for + // triggerAndWait (resumeParentOnCompletion) — that path bypasses the gate + // and its PG-side dedup is sufficient there. v2-cutover orgs do NOT skip it + // (see the claimEligible comment below): cross-table dedup has no shared + // unique constraint, so the claim must cover triggerAndWait too. // // Also gated on the same per-org mollifier flag the gate uses: when // `TRIGGER_MOLLIFIER_ENABLED=1` globally for staged rollout, the buffer @@ -310,17 +329,28 @@ export class IdempotencyKeyConcern { | Record | null | undefined) ?? null; + // v2-cutover orgs: ANY idempotency-keyed trigger can straddle a + // `runTableV2` flag flip into different physical tables (cuid -> TaskRun, + // ksuid -> task_run_v2), so the claim must serialise all of them — + // including triggerAndWait (resumeParentOnCompletion), debounce, and + // oneTimeUseToken, whose per-table unique constraints (idempotencyKey, + // oneTimeUseToken) can't see across the two tables. The + // resumeParentOnCompletion/debounce/oneTimeUseToken exclusions below are + // mollifier-gate alignment optimisations (those requests always return + // pass_through from the gate, so there's no buffer to serialise against) + // and don't apply to the cross-table concern. shouldUseV2RunTable is + // checked first so a v2 org skips the mollifier-flag resolve entirely. const claimEligible = - !request.body.options?.resumeParentOnCompletion && - !request.body.options?.debounce && - !request.options?.oneTimeUseToken && - ((await resolveOrgMollifierFlag({ - envId: request.environment.id, - orgId: request.environment.organizationId, - taskId: request.taskId, - orgFeatureFlags, - })) || - shouldUseV2RunTable(orgFeatureFlags)); + shouldUseV2RunTable(orgFeatureFlags) || + (!request.body.options?.resumeParentOnCompletion && + !request.body.options?.debounce && + !request.options?.oneTimeUseToken && + (await resolveOrgMollifierFlag({ + envId: request.environment.id, + orgId: request.environment.organizationId, + taskId: request.taskId, + orgFeatureFlags, + }))); if (claimEligible) { const ttlSeconds = Math.max( 1, @@ -351,7 +381,15 @@ export class IdempotencyKeyConcern { this.prisma ); if (writerRun) { - return { isCached: true, run: writerRun }; + // The concurrent winner already committed. Return it as a cache hit, + // and for triggerAndWait block our parent on the winner's waitpoint + // (the claim is what serialises v2 cross-table triggerAndWait). + return this.returnCachedIdempotentRun( + request, + parentStore, + writerRun, + idempotencyKey + ); } const buffered = await this.findBufferedRunWithIdempotency( request.environment.id, diff --git a/apps/webapp/app/v3/mollifier/idempotencyClaim.server.ts b/apps/webapp/app/v3/mollifier/idempotencyClaim.server.ts index 47c9733c927..b8a629d4240 100644 --- a/apps/webapp/app/v3/mollifier/idempotencyClaim.server.ts +++ b/apps/webapp/app/v3/mollifier/idempotencyClaim.server.ts @@ -5,7 +5,7 @@ import type { MollifierBuffer, } from "@trigger.dev/redis-worker"; import { logger } from "~/services/logger.server"; -import { getMollifierBuffer } from "./mollifierBuffer.server"; +import { getIdempotencyClaimBuffer } from "./mollifierBuffer.server"; // Tunables. The TTL on the claim key is bounded by typical trigger-pipeline // dwell; long enough that a slow PG insert doesn't expire mid-flight, @@ -58,13 +58,14 @@ export type ClaimOrAwaitInput = IdempotencyLookupInput & { // attempt sees the eventual PG/buffer state via existing // IdempotencyKeyConcern PG-first lookup. export async function claimOrAwait(input: ClaimOrAwaitInput): Promise { - const buffer = input.buffer === undefined ? getMollifierBuffer() : input.buffer; + const buffer = input.buffer === undefined ? getIdempotencyClaimBuffer() : input.buffer; if (!buffer) { - // Mollifier disabled / buffer construction failed. Fall open — - // caller proceeds with the trigger pipeline (PG unique constraint - // backstop). The token is never read in this case (publish/release - // are buffer-null no-ops downstream), so we skip the default - // `randomUUID()` to keep the mollifier-OFF hot path allocation-free + // No claim backend at all — both the mollifier buffer and the + // standalone claim buffer are unavailable (the general Redis host is + // unconfigured). Fall open: the caller proceeds with the trigger + // pipeline (PG unique constraint backstop). The token is never read in + // this case (publish/release are buffer-null no-ops downstream), so we + // skip the default `randomUUID()` to keep this hot path allocation-free // for idempotency-keyed triggers — `triggerTask` is the // highest-throughput code path in the system. A test-injected // generator is still honoured for deterministic assertions. @@ -164,7 +165,7 @@ export async function publishClaim(input: { ttlSeconds?: number; buffer?: MollifierBuffer | null; }): Promise { - const buffer = input.buffer === undefined ? getMollifierBuffer() : input.buffer; + const buffer = input.buffer === undefined ? getIdempotencyClaimBuffer() : input.buffer; if (!buffer) return; const ttlSeconds = input.ttlSeconds ?? DEFAULT_CLAIM_TTL_SECONDS; try { @@ -197,7 +198,7 @@ export async function releaseClaim(input: { token: string; buffer?: MollifierBuffer | null; }): Promise { - const buffer = input.buffer === undefined ? getMollifierBuffer() : input.buffer; + const buffer = input.buffer === undefined ? getIdempotencyClaimBuffer() : input.buffer; if (!buffer) return; try { await buffer.releaseClaim({ diff --git a/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts b/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts index 2f7af70d0f2..d89bc8a8a94 100644 --- a/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts +++ b/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts @@ -33,3 +33,43 @@ export function getMollifierBuffer(): MollifierBuffer | null { if (env.TRIGGER_MOLLIFIER_ENABLED !== "1") return null; return singleton("mollifierBuffer", initializeMollifierBuffer); } + +// A claim-only buffer for the pre-gate idempotency claim when the mollifier +// itself is disabled. The mollifier Redis may be unprovisioned in deployments +// that don't run the mollifier, so this points at the general webapp Redis. +// Only the claim methods (claimIdempotency / readClaim / publishClaim / +// releaseClaim) are exercised; they live under the distinct `mollifier:claim:*` +// namespace and carry their own short TTLs, so sharing the general Redis is safe. +function initializeIdempotencyClaimBuffer(): MollifierBuffer { + logger.debug("Initializing standalone idempotency-claim buffer", { + host: env.REDIS_HOST, + }); + + return new MollifierBuffer({ + redisOptions: { + keyPrefix: "", + host: env.REDIS_HOST, + port: env.REDIS_PORT, + username: env.REDIS_USERNAME, + password: env.REDIS_PASSWORD, + enableAutoPipelining: true, + ...(env.REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }), + }, + }); +} + +// Resolve the buffer backing the pre-gate idempotency claim. When the +// mollifier is enabled, reuse its buffer so claims share the mollifier's Redis. +// Otherwise return a claim-only buffer on the general Redis: a `runTableV2` +// cutover org needs the claim to serialise concurrent same-key triggers that +// would otherwise straddle the flag flip into different physical tables (cuid +// -> TaskRun, ksuid -> task_run_v2), whose per-table unique constraints can't +// see each other. Returns null only when the general Redis host is +// unconfigured, in which case the claim falls open (no coordination) exactly +// as before. +export function getIdempotencyClaimBuffer(): MollifierBuffer | null { + const mollifier = getMollifierBuffer(); + if (mollifier) return mollifier; + if (!env.REDIS_HOST) return null; + return singleton("idempotencyClaimBuffer", initializeIdempotencyClaimBuffer); +} diff --git a/apps/webapp/test/mollifierClaimResolution.test.ts b/apps/webapp/test/mollifierClaimResolution.test.ts index f61cda0d04e..e9115570af9 100644 --- a/apps/webapp/test/mollifierClaimResolution.test.ts +++ b/apps/webapp/test/mollifierClaimResolution.test.ts @@ -13,6 +13,11 @@ vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); const h = vi.hoisted(() => ({ buffer: null as unknown, orgFlag: true })); vi.mock("~/v3/mollifier/mollifierBuffer.server", () => ({ getMollifierBuffer: () => h.buffer, + // claimOrAwait/publishClaim/releaseClaim resolve their backend through + // getIdempotencyClaimBuffer (the mollifier buffer when enabled, else a + // standalone Redis claim buffer). In tests both resolve to the scripted + // buffer handle so the claim path is fully controllable. + getIdempotencyClaimBuffer: () => h.buffer, })); // Stub `mollifierGate.server` so loading the concern doesn't drag in // `env.server` (which fails to parse without a populated environment in @@ -29,7 +34,14 @@ import type { TriggerTaskRequest } from "~/runEngine/types"; function makeConcern(prisma: { findFirst: () => Promise }) { return new IdempotencyKeyConcern( - { taskRun: { findFirst: prisma.findFirst } } as never, + { + taskRun: { findFirst: prisma.findFirst }, + // The cross-table existing-run lookup reads BOTH physical tables. These + // tests use legacy ids that never match a v2 row, so task_run_v2 always + // misses and findFirstAcrossTables returns the scripted taskRun result — + // keeping the per-call scripting on `prisma.findFirst` intact. + taskRunV2: { findFirst: async () => null }, + } as never, {} as never, // engine — unused on this path {} as never, // traceEventConcern — unused on this path ); diff --git a/apps/webapp/test/mollifierResetIdempotencyKey.test.ts b/apps/webapp/test/mollifierResetIdempotencyKey.test.ts index 4909087d70c..5f0abd81f65 100644 --- a/apps/webapp/test/mollifierResetIdempotencyKey.test.ts +++ b/apps/webapp/test/mollifierResetIdempotencyKey.test.ts @@ -22,6 +22,7 @@ import { ServiceValidationError } from "~/v3/services/baseService.server"; type FakePrisma = { taskRun: { updateMany: (...args: unknown[]) => Promise<{ count: number }> }; + taskRunV2: { updateMany: (...args: unknown[]) => Promise<{ count: number }> }; }; function makePrisma(pgCount: number): FakePrisma { @@ -29,6 +30,12 @@ function makePrisma(pgCount: number): FakePrisma { taskRun: { updateMany: vi.fn(async () => ({ count: pgCount })), }, + // clearIdempotencyKey(byPredicate) clears across BOTH physical run tables. + // These tests use a legacy key that only ever matches TaskRun, so + // task_run_v2 always clears nothing. + taskRunV2: { + updateMany: vi.fn(async () => ({ count: 0 })), + }, }; } @@ -138,6 +145,12 @@ describe("ResetIdempotencyKeyService — buffer-outage handling", () => { return updateManyCalls === 1 ? { count: 0 } : { count: 1 }; }), }, + // task_run_v2 side of the both-tables byPredicate clear; never matches + // here, so it stays at 0 and the updateManyCalls assertion tracks only + // the legacy delegate. + taskRunV2: { + updateMany: vi.fn(async () => ({ count: 0 })), + }, }; const resetIdempotency = vi.fn(async () => ({ clearedRunId: null as string | null })); bufferMock.current = { resetIdempotency }; From 388dd667998555ed4d7c039ffcd96c6f047f9635 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 14:20:39 +0100 Subject: [PATCH 55/83] fix(webapp): serve realtime run feeds across both run tables A run routed to task_run_v2 was invisible to the Electric realtime feed, whose shapes were bound to the TaskRun table, so subscribeToRun, useRealtimeRun, and run polling returned nothing for those runs. Single-run subscriptions now route the shape to the correct table by id format, and the tag and batch feeds run two upstream shapes (TaskRun and task_run_v2) merged under one composite cursor the client round-trips opaquely, so no SDK change is needed. --- .../realtime/electricShapeMerge.server.ts | 167 +++++++++++ .../app/services/realtimeClient.server.ts | 272 +++++++++++++++++- apps/webapp/test/electricShapeMerge.test.ts | 201 +++++++++++++ apps/webapp/test/realtimeClient.test.ts | 7 +- 4 files changed, 642 insertions(+), 5 deletions(-) create mode 100644 apps/webapp/app/services/realtime/electricShapeMerge.server.ts create mode 100644 apps/webapp/test/electricShapeMerge.test.ts diff --git a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts new file mode 100644 index 00000000000..cfe494e4a47 --- /dev/null +++ b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts @@ -0,0 +1,167 @@ +/** + * Pure helpers for merging TWO upstream Electric shapes (one per physical run + * table — `TaskRun` and `task_run_v2`) into a single shape the realtime client + * consumes. A tag-list or batch feed matches runs in both tables during/after a + * `runTableV2` cutover, but an Electric shape is bound to one table, so the + * proxy fans out to two shapes and presents one composite continuation + * (`handle` / `offset` / `cursor`) that the client round-trips opaquely. + * + * Kept dependency-free (no DB/Redis/fetch) so the merge logic is unit-testable. + */ + +// Separator packing the two per-table continuation values into one opaque +// token. Electric's handle/offset/cursor values are alphanumeric plus `_`/`-` +// (UUID-ish handles, `_` offsets, numeric cursors) and never contain +// `~`, so it is collision-free for this charset. +export const COMPOSITE_SEP = "~"; + +export const UP_TO_DATE_MESSAGE = { headers: { control: "up-to-date" } } as const; +export const MUST_REFETCH_MESSAGE = { headers: { control: "must-refetch" } } as const; + +/** A parsed per-table shape response: continuation headers + the change rows. */ +export type ParsedShape = { + status: number; + handle?: string; + offset?: string; + cursor?: string; + schema?: string; + /** Change messages only (control messages stripped). */ + changes: unknown[]; + upToDate: boolean; + mustRefetch: boolean; +}; + +/** The prior per-table continuation the client sent (used when a shape is left + * un-polled because the other returned first). */ +export type PriorContinuation = { + handleA?: string; + offsetA: string; + cursorA?: string; + handleB?: string; + offsetB: string; + cursorB?: string; +}; + +export type MergedShape = + | { mustRefetch: true } + | { + mustRefetch: false; + changes: unknown[]; + handle: string; + offset: string; + cursor?: string; + schema?: string; + }; + +/** + * Split a composite "~" value back into its per-table parts. A value with + * no separator (or null/empty) means the client hasn't been handed a composite + * yet (the initial request before any shape exists) -> both undefined. + */ +export function decodeCompositePart(value: string | null | undefined): { + a: string | undefined; + b: string | undefined; +} { + if (!value) return { a: undefined, b: undefined }; + const idx = value.indexOf(COMPOSITE_SEP); + if (idx === -1) return { a: undefined, b: undefined }; + return { + a: value.slice(0, idx) || undefined, + b: value.slice(idx + COMPOSITE_SEP.length) || undefined, + }; +} + +/** + * The offset is never absent — Electric uses "-1" for the initial request — so + * a bare value applies to BOTH shapes (initial), and a composite splits. + */ +export function decodeCompositeOffset(offset: string): { a: string; b: string } { + const idx = offset.indexOf(COMPOSITE_SEP); + if (idx === -1) return { a: offset, b: offset }; + return { a: offset.slice(0, idx), b: offset.slice(idx + COMPOSITE_SEP.length) }; +} + +export function encodeComposite(a: string, b: string): string { + return `${a}${COMPOSITE_SEP}${b}`; +} + +/** Parse the raw body + headers of one upstream shape response. */ +export function parseShapeMessages( + status: number, + headers: { + handle?: string; + offset?: string; + cursor?: string; + schema?: string; + }, + bodyText: string +): ParsedShape { + const base = { status, ...headers }; + if (status >= 400) { + return { ...base, changes: [], upToDate: false, mustRefetch: status === 409 }; + } + let parsed: unknown; + try { + parsed = bodyText.trim() ? JSON.parse(bodyText) : []; + } catch { + // Unparseable body — safest is to make the client refetch the shape. + return { ...base, changes: [], upToDate: false, mustRefetch: true }; + } + if (!Array.isArray(parsed)) { + return { ...base, changes: [], upToDate: false, mustRefetch: true }; + } + const messages = parsed as Array<{ headers?: { control?: string } }>; + const changes = messages.filter((m) => !m?.headers?.control); + const mustRefetch = messages.some((m) => m?.headers?.control === "must-refetch"); + const upToDate = messages.some((m) => m?.headers?.control === "up-to-date"); + return { ...base, changes, upToDate, mustRefetch }; +} + +/** + * Merge two parsed per-table shapes into one composite payload. If either shape + * needs a refetch (409 / must-refetch / unparseable), the whole composite is + * reset. Otherwise the change rows are concatenated (the client merges by key, + * so order across tables doesn't matter) and the continuation values are packed + * per table, falling back to the client's prior value for a shape that wasn't + * re-polled this round. + */ +export function mergeParsedShapes( + a: ParsedShape, + b: ParsedShape, + prior: PriorContinuation +): MergedShape { + if (a.mustRefetch || b.mustRefetch || a.status >= 400 || b.status >= 400) { + return { mustRefetch: true }; + } + const cursorA = a.cursor ?? prior.cursorA; + const cursorB = b.cursor ?? prior.cursorB; + const cursor = + cursorA !== undefined || cursorB !== undefined + ? encodeComposite(cursorA ?? "", cursorB ?? "") + : undefined; + return { + mustRefetch: false, + changes: [...a.changes, ...b.changes], + handle: encodeComposite(a.handle ?? prior.handleA ?? "", b.handle ?? prior.handleB ?? ""), + offset: encodeComposite(a.offset ?? prior.offsetA, b.offset ?? prior.offsetB), + cursor, + schema: a.schema ?? b.schema, + }; +} + +/** A synthetic "no change this round" result for a shape left un-polled because + * the other returned changes first; carries its prior continuation forward. */ +export function unpolledShape( + which: "a" | "b", + prior: PriorContinuation +): ParsedShape { + return { + status: 200, + handle: which === "a" ? prior.handleA : prior.handleB, + offset: which === "a" ? prior.offsetA : prior.offsetB, + cursor: which === "a" ? prior.cursorA : prior.cursorB, + changes: [], + upToDate: true, + mustRefetch: false, + }; +} diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index 12b93f1996d..20ceb84c4cb 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -1,6 +1,18 @@ import { json } from "@remix-run/server-runtime"; import { tryCatch } from "@trigger.dev/core/utils"; -import { safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; +import { isKsuidId, safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; +import { + decodeCompositeOffset, + decodeCompositePart, + mergeParsedShapes, + MUST_REFETCH_MESSAGE, + parseShapeMessages, + unpolledShape, + UP_TO_DATE_MESSAGE, + type MergedShape, + type ParsedShape, + type PriorContinuation, +} from "./realtime/electricShapeMerge.server"; import { Callback, Result } from "ioredis"; import { randomUUID } from "node:crypto"; import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server"; @@ -49,6 +61,11 @@ const DEFAULT_ELECTRIC_COLUMNS = [ const RESERVED_COLUMNS = ["id", "taskIdentifier", "friendlyId", "status", "createdAt"]; const RESERVED_SEARCH_PARAMS = ["createdAt", "tags", "skipColumns"]; +// The two physical run tables a realtime shape can target. A run lives in +// exactly one, keyed by id format (ksuid -> task_run_v2, cuid -> TaskRun). +const TASK_RUN_TABLE = 'public."TaskRun"'; +const TASK_RUN_V2_TABLE = 'public."task_run_v2"'; + export type RealtimeClientOptions = { electricOrigin: string | string[]; redis: RedisWithClusterOptions; @@ -118,10 +135,15 @@ export class RealtimeClient { clientVersion?: string, signal?: AbortSignal ) { + // Route the shape to the physical table the run lives in: a v2 run's id is + // a KSUID (task_run_v2), a legacy run's a cuid (TaskRun). The run was + // already resolved by the route, so this id is authoritative. + const table = isKsuidId(runId) ? TASK_RUN_V2_TABLE : TASK_RUN_TABLE; return this.#streamRunsWhere( url, environment, `id='${runId}'`, + table, apiVersion, requestOptions, clientVersion, @@ -145,7 +167,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - return this.#streamRunsWhere( + return this.#streamRunsAcrossTables( url, environment, whereClause, @@ -179,7 +201,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - const response = await this.#streamRunsWhere( + const response = await this.#streamRunsAcrossTables( url, environment, whereClause, @@ -278,6 +300,7 @@ export class RealtimeClient { url: URL | string, environment: RealtimeEnvironment, whereClause: string, + table: string, apiVersion: API_VERSIONS, requestOptions?: RealtimeRequestOptions, clientVersion?: string, @@ -287,6 +310,7 @@ export class RealtimeClient { url, environment, whereClause, + table, requestOptions, clientVersion ); @@ -300,10 +324,250 @@ export class RealtimeClient { ); } + // Stream a feed that spans BOTH physical run tables (the tag-list and batch + // feeds) by running two upstream Electric shapes — public."TaskRun" and + // public."task_run_v2" — under a single composite continuation the client + // round-trips opaquely. A run lives in exactly one table, so the union of the + // two shapes is the full feed; the client merges by row key and never learns + // there are two shapes. See electricShapeMerge.server.ts for the pure logic. + async #streamRunsAcrossTables( + url: URL | string, + environment: RealtimeEnvironment, + whereClause: string, + apiVersion: API_VERSIONS, + requestOptions?: RealtimeRequestOptions, + clientVersion?: string, + signal?: AbortSignal + ): Promise { + const $url = new URL(url.toString()); + const isLive = isLiveRequestUrl($url); + const incomingHandle = extractShapeId($url); + const incomingOffset = $url.searchParams.get("offset") ?? "-1"; + const incomingCursor = $url.searchParams.get("cursor"); + + const handles = decodeCompositePart(incomingHandle); + const offsets = decodeCompositeOffset(incomingOffset); + const cursors = decodeCompositePart(incomingCursor); + + const prior: PriorContinuation = { + handleA: handles.a, + offsetA: offsets.a, + cursorA: cursors.a, + handleB: handles.b, + offsetB: offsets.b, + cursorB: cursors.b, + }; + + const urlA = this.#constructMergeShapeUrl( + $url, + environment, + whereClause, + TASK_RUN_TABLE, + { handle: handles.a, offset: offsets.a, cursor: cursors.a }, + requestOptions, + clientVersion + ); + const urlB = this.#constructMergeShapeUrl( + $url, + environment, + whereClause, + TASK_RUN_V2_TABLE, + { handle: handles.b, offset: offsets.b, cursor: cursors.b }, + requestOptions, + clientVersion + ); + + // One concurrency slot for the composite live request: it maps to a single + // client request even though we fan out to two upstream long-polls. + let requestId: string | undefined; + if (isLive && incomingHandle) { + const concurrencyLimit = await this.cachedLimitProvider.getCachedLimit( + environment.organizationId, + 100_000 + ); + if (!concurrencyLimit) { + logger.error("Failed to get concurrency limit", { + organizationId: environment.organizationId, + }); + return json({ error: "Failed to get concurrency limit" }, { status: 500 }); + } + requestId = randomUUID(); + if (!(await this.#incrementAndCheck(environment.id, requestId, concurrencyLimit))) { + return json({ error: "Too many concurrent requests" }, { status: 429 }); + } + } + + try { + const merged = await this.#raceAndMergeShapes(urlA, urlB, isLive, prior, signal); + return this.#buildMergeResponse(merged, isLive, apiVersion, clientVersion); + } finally { + if (requestId) { + await this.#decrementConcurrency(environment.id, requestId); + } + } + } + + // Build the per-table Electric URL, replacing the composite continuation the + // client sent with this table's decoded part. + #constructMergeShapeUrl( + baseUrl: URL, + environment: RealtimeEnvironment, + whereClause: string, + table: string, + perTable: { handle?: string; offset: string; cursor?: string }, + requestOptions?: RealtimeRequestOptions, + clientVersion?: string + ): URL { + const electricUrl = this.#constructRunsElectricUrl( + baseUrl, + environment, + whereClause, + table, + requestOptions, + clientVersion + ); + // Upstream always speaks current Electric (handle, not shape_id). + electricUrl.searchParams.delete("shape_id"); + if (perTable.handle !== undefined) { + electricUrl.searchParams.set("handle", perTable.handle); + } else { + electricUrl.searchParams.delete("handle"); + } + electricUrl.searchParams.set("offset", perTable.offset); + if (perTable.cursor !== undefined) { + electricUrl.searchParams.set("cursor", perTable.cursor); + } else { + electricUrl.searchParams.delete("cursor"); + } + return electricUrl; + } + + // Fetch both shapes. For a live request, return as soon as ONE yields changes + // (or needs a refetch) and carry the other's prior continuation forward — so a + // change on either table isn't delayed by the other's idle long-poll. If the + // first to settle had nothing, wait for the other before responding. + async #raceAndMergeShapes( + urlA: URL, + urlB: URL, + isLive: boolean, + prior: PriorContinuation, + signal?: AbortSignal + ): Promise { + const ctlA = new AbortController(); + const ctlB = new AbortController(); + const link = (ctl: AbortController) => + signal ? AbortSignal.any([signal, ctl.signal]) : ctl.signal; + + let aRes: ParsedShape | undefined; + let bRes: ParsedShape | undefined; + const pA = this.#fetchShape(urlA, link(ctlA)).then((r) => { + aRes = r; + return "a" as const; + }); + const pB = this.#fetchShape(urlB, link(ctlB)).then((r) => { + bRes = r; + return "b" as const; + }); + + try { + if (!isLive) { + await Promise.all([pA, pB]); + return mergeParsedShapes(aRes!, bRes!, prior); + } + + const actionable = (r: ParsedShape) => + r.mustRefetch || r.status >= 400 || r.changes.length > 0; + + const first = await Promise.race([pA, pB]); + const firstRes = first === "a" ? aRes! : bRes!; + if (actionable(firstRes)) { + (first === "a" ? ctlB : ctlA).abort(); + return first === "a" + ? mergeParsedShapes(aRes!, unpolledShape("b", prior), prior) + : mergeParsedShapes(unpolledShape("a", prior), bRes!, prior); + } + + // First settled empty (idle timeout) — wait for the other. + await (first === "a" ? pB : pA); + return mergeParsedShapes(aRes!, bRes!, prior); + } catch (error) { + ctlA.abort(); + ctlB.abort(); + throw error; + } + } + + async #fetchShape(electricUrl: URL, signal?: AbortSignal): Promise { + const resp = await longPollingFetch(electricUrl.toString(), { signal }); + const headers = { + handle: + resp.headers.get("electric-handle") ?? resp.headers.get("electric-shape-id") ?? undefined, + offset: + resp.headers.get("electric-offset") ?? + resp.headers.get("electric-chunk-last-offset") ?? + undefined, + cursor: resp.headers.get("electric-cursor") ?? undefined, + schema: resp.headers.get("electric-schema") ?? undefined, + }; + if (resp.status >= 400) { + try { + await resp.body?.cancel(); + } catch {} + return parseShapeMessages(resp.status, headers, ""); + } + const bodyText = await resp.text(); + return parseShapeMessages(resp.status, headers, bodyText); + } + + #buildMergeResponse( + merged: MergedShape, + isLive: boolean, + apiVersion: API_VERSIONS, + clientVersion?: string + ): Response { + const responseHeaders = new Headers(); + responseHeaders.set("content-type", "application/json"); + responseHeaders.set("cache-control", "no-store"); + // Match the native client: expose electric-* headers cross-origin or the + // deployed react-hooks fail with MissingHeadersError. + responseHeaders.set("access-control-allow-origin", "*"); + responseHeaders.set("access-control-expose-headers", "*"); + + if (merged.mustRefetch) { + // Reset the client's shape state; it refetches both tables from scratch. + return new Response(JSON.stringify([MUST_REFETCH_MESSAGE, UP_TO_DATE_MESSAGE]), { + status: 409, + headers: responseHeaders, + }); + } + + if (clientVersion) { + responseHeaders.set("electric-handle", merged.handle); + responseHeaders.set("electric-offset", merged.offset); + } else { + responseHeaders.set("electric-shape-id", merged.handle); + responseHeaders.set("electric-chunk-last-offset", merged.offset); + } + if (isLive) { + // The client requires electric-cursor on every live response (its live + // cache-buster). Fall back to the offset if neither shape provided one. + responseHeaders.set("electric-cursor", merged.cursor ?? merged.offset); + } else if (merged.schema !== undefined) { + // Non-live responses require electric-schema. + responseHeaders.set("electric-schema", merged.schema); + } + + const body = JSON.stringify([...merged.changes, UP_TO_DATE_MESSAGE]); + const finalBody = + apiVersion === CURRENT_API_VERSION ? body : this.#rewriteResponseBodyForNoneApiVersion(body); + return new Response(finalBody, { status: 200, headers: responseHeaders }); + } + #constructRunsElectricUrl( url: URL | string, environment: RealtimeEnvironment, whereClause: string, + table: string, requestOptions?: RealtimeRequestOptions, clientVersion?: string ): URL { @@ -322,7 +586,7 @@ export class RealtimeClient { }); electricUrl.searchParams.set("where", whereClause); - electricUrl.searchParams.set("table", 'public."TaskRun"'); + electricUrl.searchParams.set("table", table); if (!clientVersion) { // If the client version is not provided, that means we're using an older client diff --git a/apps/webapp/test/electricShapeMerge.test.ts b/apps/webapp/test/electricShapeMerge.test.ts new file mode 100644 index 00000000000..7f0bf9e0b5b --- /dev/null +++ b/apps/webapp/test/electricShapeMerge.test.ts @@ -0,0 +1,201 @@ +import { describe, expect, it } from "vitest"; +import { + decodeCompositeOffset, + decodeCompositePart, + encodeComposite, + mergeParsedShapes, + parseShapeMessages, + unpolledShape, + type ParsedShape, + type PriorContinuation, +} from "~/services/realtime/electricShapeMerge.server"; + +const INSERT = { + key: '"public"."TaskRun"/"r1"', + value: { id: "r1" }, + headers: { operation: "insert" }, +}; +const UPDATE = { + key: '"public"."task_run_v2"/"r2"', + value: { id: "r2" }, + headers: { operation: "update" }, +}; + +function shape(overrides: Partial = {}): ParsedShape { + return { + status: 200, + handle: "h", + offset: "o", + cursor: "c", + schema: '{"id":{"type":"text"}}', + changes: [], + upToDate: true, + mustRefetch: false, + ...overrides, + }; +} + +const PRIOR: PriorContinuation = { + handleA: "HA", + offsetA: "OA", + cursorA: "CA", + handleB: "HB", + offsetB: "OB", + cursorB: "CB", +}; + +describe("decodeCompositePart", () => { + it("returns both undefined for null / no separator", () => { + expect(decodeCompositePart(null)).toEqual({ a: undefined, b: undefined }); + expect(decodeCompositePart(undefined)).toEqual({ a: undefined, b: undefined }); + expect(decodeCompositePart("")).toEqual({ a: undefined, b: undefined }); + // A bare value with no separator means "not a composite yet" -> initial. + expect(decodeCompositePart("solo")).toEqual({ a: undefined, b: undefined }); + }); + + it("splits a composite into its two parts", () => { + expect(decodeCompositePart("hA~hB")).toEqual({ a: "hA", b: "hB" }); + }); + + it("treats an empty side as undefined", () => { + expect(decodeCompositePart("hA~")).toEqual({ a: "hA", b: undefined }); + expect(decodeCompositePart("~hB")).toEqual({ a: undefined, b: "hB" }); + }); +}); + +describe("decodeCompositeOffset", () => { + it("applies a bare offset (e.g. the initial -1) to both shapes", () => { + expect(decodeCompositeOffset("-1")).toEqual({ a: "-1", b: "-1" }); + }); + + it("splits a composite offset", () => { + expect(decodeCompositeOffset("26800552_0~26800999_2")).toEqual({ + a: "26800552_0", + b: "26800999_2", + }); + }); + + it("round-trips through encodeComposite", () => { + expect(decodeCompositeOffset(encodeComposite("x_1", "y_2"))).toEqual({ a: "x_1", b: "y_2" }); + }); +}); + +describe("parseShapeMessages", () => { + const headers = { handle: "h", offset: "o", cursor: "c", schema: "s" }; + + it("extracts change rows and the up-to-date flag", () => { + const body = JSON.stringify([INSERT, { headers: { control: "up-to-date" } }]); + const parsed = parseShapeMessages(200, headers, body); + expect(parsed.changes).toEqual([INSERT]); + expect(parsed.upToDate).toBe(true); + expect(parsed.mustRefetch).toBe(false); + }); + + it("treats a bare up-to-date as no changes", () => { + const parsed = parseShapeMessages( + 200, + headers, + JSON.stringify([{ headers: { control: "up-to-date" } }]) + ); + expect(parsed.changes).toEqual([]); + expect(parsed.upToDate).toBe(true); + }); + + it("flags must-refetch from a 409 status", () => { + const parsed = parseShapeMessages(409, headers, ""); + expect(parsed.mustRefetch).toBe(true); + expect(parsed.changes).toEqual([]); + }); + + it("flags must-refetch from a control message", () => { + const body = JSON.stringify([ + { headers: { control: "must-refetch" } }, + { headers: { control: "up-to-date" } }, + ]); + expect(parseShapeMessages(200, headers, body).mustRefetch).toBe(true); + }); + + it("flags must-refetch for an unparseable / non-array body", () => { + expect(parseShapeMessages(200, headers, "not json").mustRefetch).toBe(true); + expect(parseShapeMessages(200, headers, "{}").mustRefetch).toBe(true); + }); + + it("treats an empty body as no changes (not up-to-date)", () => { + const parsed = parseShapeMessages(200, headers, ""); + expect(parsed.changes).toEqual([]); + expect(parsed.upToDate).toBe(false); + expect(parsed.mustRefetch).toBe(false); + }); +}); + +describe("mergeParsedShapes", () => { + it("concatenates change rows from both tables", () => { + const merged = mergeParsedShapes( + shape({ changes: [INSERT], handle: "hA", offset: "oA", cursor: "cA" }), + shape({ changes: [UPDATE], handle: "hB", offset: "oB", cursor: "cB" }), + PRIOR + ); + expect(merged.mustRefetch).toBe(false); + if (merged.mustRefetch) return; + expect(merged.changes).toEqual([INSERT, UPDATE]); + expect(merged.handle).toBe(encodeComposite("hA", "hB")); + expect(merged.offset).toBe(encodeComposite("oA", "oB")); + expect(merged.cursor).toBe(encodeComposite("cA", "cB")); + }); + + it("resets when either shape needs a refetch", () => { + expect(mergeParsedShapes(shape({ mustRefetch: true }), shape(), PRIOR)).toEqual({ + mustRefetch: true, + }); + expect(mergeParsedShapes(shape(), shape({ status: 409 }), PRIOR)).toEqual({ + mustRefetch: true, + }); + }); + + it("falls back to the prior continuation for a shape that returned nothing", () => { + // B was left un-polled (the other table returned changes first). + const merged = mergeParsedShapes( + shape({ changes: [INSERT], handle: "hA2", offset: "oA2", cursor: "cA2" }), + unpolledShape("b", PRIOR), + PRIOR + ); + expect(merged.mustRefetch).toBe(false); + if (merged.mustRefetch) return; + expect(merged.changes).toEqual([INSERT]); + expect(merged.handle).toBe(encodeComposite("hA2", "HB")); + expect(merged.offset).toBe(encodeComposite("oA2", "OB")); + expect(merged.cursor).toBe(encodeComposite("cA2", "CB")); + }); + + it("uses the prior cursor when a returned shape omits it", () => { + const merged = mergeParsedShapes( + shape({ cursor: undefined, handle: "hA", offset: "oA" }), + shape({ cursor: "cB", handle: "hB", offset: "oB" }), + PRIOR + ); + if (merged.mustRefetch) throw new Error("unexpected refetch"); + // a omitted cursor -> prior.cursorA ("CA"); b returned "cB". + expect(merged.cursor).toBe(encodeComposite("CA", "cB")); + }); + + it("omits the cursor entirely when neither shape nor prior has one (initial snapshot)", () => { + const initialPrior: PriorContinuation = { offsetA: "-1", offsetB: "-1" }; + const merged = mergeParsedShapes( + shape({ cursor: undefined, handle: "hA", offset: "oA" }), + shape({ cursor: undefined, handle: "hB", offset: "oB" }), + initialPrior + ); + if (merged.mustRefetch) throw new Error("unexpected refetch"); + expect(merged.cursor).toBeUndefined(); + }); + + it("carries schema from whichever shape supplied it", () => { + const merged = mergeParsedShapes( + shape({ schema: undefined }), + shape({ schema: '{"id":{"type":"text"}}' }), + PRIOR + ); + if (merged.mustRefetch) throw new Error("unexpected refetch"); + expect(merged.schema).toBe('{"id":{"type":"text"}}'); + }); +}); diff --git a/apps/webapp/test/realtimeClient.test.ts b/apps/webapp/test/realtimeClient.test.ts index d98213e5b17..cdff50e3d18 100644 --- a/apps/webapp/test/realtimeClient.test.ts +++ b/apps/webapp/test/realtimeClient.test.ts @@ -237,8 +237,13 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { const chunkOffset = headers["electric-offset"]; expect(response.status).toBe(200); + // The tag/list feed spans both physical run tables, so streamRuns merges + // two upstream Electric shapes (TaskRun + task_run_v2) under one composite + // cursor: handle and offset each pack the two per-table values joined by + // "~". Both shapes are at "0_0" for the initial snapshot. expect(shapeId).toBeDefined(); - expect(chunkOffset).toBe("0_0"); + expect(shapeId).toContain("~"); + expect(chunkOffset).toBe("0_0~0_0"); } ); From eeb1079a6c17f5b75b1d635278f79a76ef03de4a Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 14:20:39 +0100 Subject: [PATCH 56/83] fix(webapp): lock runTableV2 on the global flags page runTableV2 is resolved per organization only, so a global toggle on the admin flags page did nothing. Mark it read-only there to remove the misleading control; per-org control stays on the org dialog. --- apps/webapp/app/v3/featureFlags.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/webapp/app/v3/featureFlags.ts b/apps/webapp/app/v3/featureFlags.ts index 000013f6d23..a0a6146b485 100644 --- a/apps/webapp/app/v3/featureFlags.ts +++ b/apps/webapp/app/v3/featureFlags.ts @@ -59,6 +59,11 @@ export type FeatureFlagKey = keyof typeof FeatureFlagCatalog; export const GLOBAL_LOCKED_FLAGS: FeatureFlagKey[] = [ FEATURE_FLAG.defaultWorkerInstanceGroupId, FEATURE_FLAG.taskEventRepository, + // runTableV2 is resolved per-org only (`shouldUseV2RunTable` reads + // `Organization.featureFlags`, never the global FeatureFlag table), so a + // global toggle would be a silent no-op. Lock it on the global page to + // avoid that footgun; per-org control stays on the org dialog. + FEATURE_FLAG.runTableV2, ]; // Flags that are read-only on the org-level dialog. From 3d4ca9e5feb8630282462ed704ed29b3dee0b61f Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 14:38:23 +0100 Subject: [PATCH 57/83] fix(webapp): scope cross-table run hierarchy hydration to the environment The parent/root/child hydration that resolves a run's hierarchy across both run tables looked runs up by id alone. Those pointers are now plain scalars with no foreign-key enforcement, so a stale or malformed pointer could resolve to a run in another environment and leak its metadata through the run and span presenters. Scope every lookup to the run's runtimeEnvironmentId, restoring the same-environment guarantee the table-bound relation select used to provide. --- .../v3/ApiRetrieveRunPresenter.server.ts | 8 ++++- .../app/presenters/v3/RunPresenter.server.ts | 1 + .../app/presenters/v3/SpanPresenter.server.ts | 4 +-- apps/webapp/app/v3/runHierarchy.server.ts | 31 ++++++++++++++++--- 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index a21d0b112fd..b2f7b46d554 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -149,10 +149,16 @@ export class ApiRetrieveRunPresenter { // and children by a both-table predicate. const { parentTaskRun, rootTaskRun } = await hydrateParentAndRoot( { parentTaskRunId: pgRow.parentTaskRunId, rootTaskRunId: pgRow.rootTaskRunId }, + { runtimeEnvironmentId: env.id }, + commonRunSelect, + $replica + ); + const childRuns = await hydrateChildRuns( + pgRow.id, + { runtimeEnvironmentId: env.id }, commonRunSelect, $replica ); - const childRuns = await hydrateChildRuns(pgRow.id, commonRunSelect, $replica); return { ...pgRow, parentTaskRun, rootTaskRun, childRuns, isBuffered: false }; } diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index bb2daecf208..9b37448b88e 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -137,6 +137,7 @@ export class RunPresenter { // table-bound Prisma relation select would miss. const { parentTaskRun, rootTaskRun } = await hydrateParentAndRoot( { parentTaskRunId: run.parentTaskRunId, rootTaskRunId: run.rootTaskRunId }, + { runtimeEnvironmentId: run.runtimeEnvironment.id }, { friendlyId: true, spanId: true, createdAt: true }, this.#prismaClient ); diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index e202b20fbf2..30541e0c2c1 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -623,14 +623,14 @@ export class SpanPresenter extends BasePresenter { const [parentTaskRun, rootTaskRun] = await Promise.all([ run.parentTaskRunId ? runStore.findRun( - { id: run.parentTaskRunId }, + { id: run.parentTaskRunId, runtimeEnvironmentId: environmentId }, { select: { taskIdentifier: true, friendlyId: true, spanId: true } }, this._replica ) : Promise.resolve(null), run.rootTaskRunId ? runStore.findRun( - { id: run.rootTaskRunId }, + { id: run.rootTaskRunId, runtimeEnvironmentId: environmentId }, { select: { taskIdentifier: true, friendlyId: true, spanId: true, createdAt: true } }, this._replica ) diff --git a/apps/webapp/app/v3/runHierarchy.server.ts b/apps/webapp/app/v3/runHierarchy.server.ts index d2f1489364b..5d96eb01247 100644 --- a/apps/webapp/app/v3/runHierarchy.server.ts +++ b/apps/webapp/app/v3/runHierarchy.server.ts @@ -14,9 +14,15 @@ type ReadClient = PrismaClientOrTransaction | PrismaReplicaClient; * parent/root. Resolving each by id instead lets RunStore route to the correct * table by id format. Pass the same `select` the caller would have used on the * relation. + * + * The lookups are scoped to the run's `runtimeEnvironmentId`: the parent/root + * pointers are plain scalars with no FK enforcement, so a stale or malformed + * pointer could otherwise resolve to a run in another environment and leak its + * metadata. The relation select this replaces was implicitly same-environment. */ export async function hydrateParentAndRoot( ids: { parentTaskRunId: string | null; rootTaskRunId: string | null }, + scope: { runtimeEnvironmentId: string }, select: S, client?: ReadClient ): Promise<{ @@ -25,10 +31,18 @@ export async function hydrateParentAndRoot( }> { const [parentTaskRun, rootTaskRun] = await Promise.all([ ids.parentTaskRunId - ? runStore.findRun({ id: ids.parentTaskRunId }, { select }, client) + ? runStore.findRun( + { id: ids.parentTaskRunId, runtimeEnvironmentId: scope.runtimeEnvironmentId }, + { select }, + client + ) : Promise.resolve(null), ids.rootTaskRunId - ? runStore.findRun({ id: ids.rootTaskRunId }, { select }, client) + ? runStore.findRun( + { id: ids.rootTaskRunId, runtimeEnvironmentId: scope.runtimeEnvironmentId }, + { select }, + client + ) : Promise.resolve(null), ]); @@ -42,15 +56,24 @@ export async function hydrateParentAndRoot( * A run's direct child runs across BOTH physical tables. Children reference the * parent by the scalar `parentTaskRunId`, and a v2 parent can have legacy cuid * children (or vice versa) in the mixed window, so this is a non-id predicate - * read that `findRuns` resolves against both tables. + * read that `findRuns` resolves against both tables. Scoped to the run's + * `runtimeEnvironmentId` so a stale/malformed `parentTaskRunId` pointer can't + * surface children from another environment. */ export async function hydrateChildRuns( parentRunId: string, + scope: { runtimeEnvironmentId: string }, select: S, client?: ReadClient ): Promise[]> { return runStore.findRuns( - { where: { parentTaskRunId: parentRunId }, select }, + { + where: { + parentTaskRunId: parentRunId, + runtimeEnvironmentId: scope.runtimeEnvironmentId, + }, + select, + }, client ) as Promise[]>; } From 59dd560feea06e63b1fc55e66c57af3fb9df2067 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 14:38:23 +0100 Subject: [PATCH 58/83] fix(webapp): swallow the aborted sibling fetch in the realtime merge When the two-table realtime shape merge returns as soon as one upstream shape yields, it aborts the other fetch and returns immediately. That promise was left without a rejection handler, so the abort could surface as an unhandled rejection on the server. Attach a no-op catch to the aborted fetch. --- apps/webapp/app/services/realtimeClient.server.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index 20ceb84c4cb..db20712a228 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -482,6 +482,10 @@ export class RealtimeClient { const firstRes = first === "a" ? aRes! : bRes!; if (actionable(firstRes)) { (first === "a" ? ctlB : ctlA).abort(); + // The aborted sibling fetch rejects once the abort propagates; attach a + // no-op catch so it doesn't surface as an unhandled rejection after we + // have already returned. + void (first === "a" ? pB : pA).catch(() => {}); return first === "a" ? mergeParsedShapes(aRes!, unpolledShape("b", prior), prior) : mergeParsedShapes(unpolledShape("a", prior), bRes!, prior); From 59866a982ae74ec3d9b126ee7318bf8f2267a542 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 15:08:32 +0100 Subject: [PATCH 59/83] fix(webapp): harden the realtime merge against orphaned fetch rejections The two-table shape merge could leave one upstream fetch pending without a rejection handler when it aborts the race loser or rethrows from the catch block. Attach a detached no-op catch to both fetches up front so an abandoned fetch can never surface as an unhandled rejection on any path. Also document that a tag/batch subscription opens two upstream Electric connections while an org spans both run tables. --- .../app/services/realtimeClient.server.ts | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index db20712a228..d2e68c64e4e 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -330,6 +330,12 @@ export class RealtimeClient { // round-trips opaquely. A run lives in exactly one table, so the union of the // two shapes is the full feed; the client merges by row key and never learns // there are two shapes. See electricShapeMerge.server.ts for the pure logic. + // + // Cost: this opens TWO upstream Electric long-polls per tag/batch + // subscription (vs one for a single-table feed), so these feeds use ~2x + // Electric connections while an org has runs across both tables. Single-run + // subscriptions are unaffected — one shape, routed to the run's table by id + // format. async #streamRunsAcrossTables( url: URL | string, environment: RealtimeEnvironment, @@ -468,6 +474,13 @@ export class RealtimeClient { bRes = r; return "b" as const; }); + // A shape we don't end up awaiting (the race loser we abort, or the sibling + // left pending when the catch below rethrows) must not surface as an + // unhandled rejection. Attach detached no-op catches up front; the + // race/await paths still observe the original rejections through their own + // reactions, so this only swallows an otherwise-orphaned rejection. + void pA.catch(() => {}); + void pB.catch(() => {}); try { if (!isLive) { @@ -481,11 +494,10 @@ export class RealtimeClient { const first = await Promise.race([pA, pB]); const firstRes = first === "a" ? aRes! : bRes!; if (actionable(firstRes)) { + // Got changes/refetch from one shape; abort the other and return + // immediately. Its rejection is already swallowed by the catch attached + // above, so the abort can't surface as an unhandled rejection. (first === "a" ? ctlB : ctlA).abort(); - // The aborted sibling fetch rejects once the abort propagates; attach a - // no-op catch so it doesn't surface as an unhandled rejection after we - // have already returned. - void (first === "a" ? pB : pA).catch(() => {}); return first === "a" ? mergeParsedShapes(aRes!, unpolledShape("b", prior), prior) : mergeParsedShapes(unpolledShape("a", prior), bRes!, prior); From 0084704ed2b60898d167af35be4bc359eeae25ae Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 16:01:37 +0100 Subject: [PATCH 60/83] fix(webapp): gate runTableV2 on native realtime instead of merging Electric shapes Electric realtime shapes are bound to a single table, so a task_run_v2 run was invisible to realtime subscriptions. The previous approach merged two Electric shapes per tag/batch feed under a composite cursor, which doubled Electric long-poll connections for those feeds. Electric is being retired in favor of the native realtime backend, which is table-agnostic and already observes both run tables, so that merge is throwaway. Drop the Electric dual-shape merge (revert realtimeClient to its single-table form, remove the merge module) and instead gate runTableV2 on the native backend: a run only routes to task_run_v2 when the deployment has native realtime enabled and the org's realtimeBackend flag is native. This keeps v2 runs realtime-observable without touching Electric, and the gate auto-satisfies once Electric is removed and native is the default. The idempotency pre-gate claim inherits the same gate. --- .../realtime/electricShapeMerge.server.ts | 167 ---------- .../app/services/realtimeClient.server.ts | 288 +----------------- apps/webapp/test/electricShapeMerge.test.ts | 201 ------------ apps/webapp/test/realtimeClient.test.ts | 7 +- 4 files changed, 5 insertions(+), 658 deletions(-) delete mode 100644 apps/webapp/app/services/realtime/electricShapeMerge.server.ts delete mode 100644 apps/webapp/test/electricShapeMerge.test.ts diff --git a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts deleted file mode 100644 index cfe494e4a47..00000000000 --- a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Pure helpers for merging TWO upstream Electric shapes (one per physical run - * table — `TaskRun` and `task_run_v2`) into a single shape the realtime client - * consumes. A tag-list or batch feed matches runs in both tables during/after a - * `runTableV2` cutover, but an Electric shape is bound to one table, so the - * proxy fans out to two shapes and presents one composite continuation - * (`handle` / `offset` / `cursor`) that the client round-trips opaquely. - * - * Kept dependency-free (no DB/Redis/fetch) so the merge logic is unit-testable. - */ - -// Separator packing the two per-table continuation values into one opaque -// token. Electric's handle/offset/cursor values are alphanumeric plus `_`/`-` -// (UUID-ish handles, `_` offsets, numeric cursors) and never contain -// `~`, so it is collision-free for this charset. -export const COMPOSITE_SEP = "~"; - -export const UP_TO_DATE_MESSAGE = { headers: { control: "up-to-date" } } as const; -export const MUST_REFETCH_MESSAGE = { headers: { control: "must-refetch" } } as const; - -/** A parsed per-table shape response: continuation headers + the change rows. */ -export type ParsedShape = { - status: number; - handle?: string; - offset?: string; - cursor?: string; - schema?: string; - /** Change messages only (control messages stripped). */ - changes: unknown[]; - upToDate: boolean; - mustRefetch: boolean; -}; - -/** The prior per-table continuation the client sent (used when a shape is left - * un-polled because the other returned first). */ -export type PriorContinuation = { - handleA?: string; - offsetA: string; - cursorA?: string; - handleB?: string; - offsetB: string; - cursorB?: string; -}; - -export type MergedShape = - | { mustRefetch: true } - | { - mustRefetch: false; - changes: unknown[]; - handle: string; - offset: string; - cursor?: string; - schema?: string; - }; - -/** - * Split a composite "~" value back into its per-table parts. A value with - * no separator (or null/empty) means the client hasn't been handed a composite - * yet (the initial request before any shape exists) -> both undefined. - */ -export function decodeCompositePart(value: string | null | undefined): { - a: string | undefined; - b: string | undefined; -} { - if (!value) return { a: undefined, b: undefined }; - const idx = value.indexOf(COMPOSITE_SEP); - if (idx === -1) return { a: undefined, b: undefined }; - return { - a: value.slice(0, idx) || undefined, - b: value.slice(idx + COMPOSITE_SEP.length) || undefined, - }; -} - -/** - * The offset is never absent — Electric uses "-1" for the initial request — so - * a bare value applies to BOTH shapes (initial), and a composite splits. - */ -export function decodeCompositeOffset(offset: string): { a: string; b: string } { - const idx = offset.indexOf(COMPOSITE_SEP); - if (idx === -1) return { a: offset, b: offset }; - return { a: offset.slice(0, idx), b: offset.slice(idx + COMPOSITE_SEP.length) }; -} - -export function encodeComposite(a: string, b: string): string { - return `${a}${COMPOSITE_SEP}${b}`; -} - -/** Parse the raw body + headers of one upstream shape response. */ -export function parseShapeMessages( - status: number, - headers: { - handle?: string; - offset?: string; - cursor?: string; - schema?: string; - }, - bodyText: string -): ParsedShape { - const base = { status, ...headers }; - if (status >= 400) { - return { ...base, changes: [], upToDate: false, mustRefetch: status === 409 }; - } - let parsed: unknown; - try { - parsed = bodyText.trim() ? JSON.parse(bodyText) : []; - } catch { - // Unparseable body — safest is to make the client refetch the shape. - return { ...base, changes: [], upToDate: false, mustRefetch: true }; - } - if (!Array.isArray(parsed)) { - return { ...base, changes: [], upToDate: false, mustRefetch: true }; - } - const messages = parsed as Array<{ headers?: { control?: string } }>; - const changes = messages.filter((m) => !m?.headers?.control); - const mustRefetch = messages.some((m) => m?.headers?.control === "must-refetch"); - const upToDate = messages.some((m) => m?.headers?.control === "up-to-date"); - return { ...base, changes, upToDate, mustRefetch }; -} - -/** - * Merge two parsed per-table shapes into one composite payload. If either shape - * needs a refetch (409 / must-refetch / unparseable), the whole composite is - * reset. Otherwise the change rows are concatenated (the client merges by key, - * so order across tables doesn't matter) and the continuation values are packed - * per table, falling back to the client's prior value for a shape that wasn't - * re-polled this round. - */ -export function mergeParsedShapes( - a: ParsedShape, - b: ParsedShape, - prior: PriorContinuation -): MergedShape { - if (a.mustRefetch || b.mustRefetch || a.status >= 400 || b.status >= 400) { - return { mustRefetch: true }; - } - const cursorA = a.cursor ?? prior.cursorA; - const cursorB = b.cursor ?? prior.cursorB; - const cursor = - cursorA !== undefined || cursorB !== undefined - ? encodeComposite(cursorA ?? "", cursorB ?? "") - : undefined; - return { - mustRefetch: false, - changes: [...a.changes, ...b.changes], - handle: encodeComposite(a.handle ?? prior.handleA ?? "", b.handle ?? prior.handleB ?? ""), - offset: encodeComposite(a.offset ?? prior.offsetA, b.offset ?? prior.offsetB), - cursor, - schema: a.schema ?? b.schema, - }; -} - -/** A synthetic "no change this round" result for a shape left un-polled because - * the other returned changes first; carries its prior continuation forward. */ -export function unpolledShape( - which: "a" | "b", - prior: PriorContinuation -): ParsedShape { - return { - status: 200, - handle: which === "a" ? prior.handleA : prior.handleB, - offset: which === "a" ? prior.offsetA : prior.offsetB, - cursor: which === "a" ? prior.cursorA : prior.cursorB, - changes: [], - upToDate: true, - mustRefetch: false, - }; -} diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index d2e68c64e4e..12b93f1996d 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -1,18 +1,6 @@ import { json } from "@remix-run/server-runtime"; import { tryCatch } from "@trigger.dev/core/utils"; -import { isKsuidId, safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; -import { - decodeCompositeOffset, - decodeCompositePart, - mergeParsedShapes, - MUST_REFETCH_MESSAGE, - parseShapeMessages, - unpolledShape, - UP_TO_DATE_MESSAGE, - type MergedShape, - type ParsedShape, - type PriorContinuation, -} from "./realtime/electricShapeMerge.server"; +import { safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; import { Callback, Result } from "ioredis"; import { randomUUID } from "node:crypto"; import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server"; @@ -61,11 +49,6 @@ const DEFAULT_ELECTRIC_COLUMNS = [ const RESERVED_COLUMNS = ["id", "taskIdentifier", "friendlyId", "status", "createdAt"]; const RESERVED_SEARCH_PARAMS = ["createdAt", "tags", "skipColumns"]; -// The two physical run tables a realtime shape can target. A run lives in -// exactly one, keyed by id format (ksuid -> task_run_v2, cuid -> TaskRun). -const TASK_RUN_TABLE = 'public."TaskRun"'; -const TASK_RUN_V2_TABLE = 'public."task_run_v2"'; - export type RealtimeClientOptions = { electricOrigin: string | string[]; redis: RedisWithClusterOptions; @@ -135,15 +118,10 @@ export class RealtimeClient { clientVersion?: string, signal?: AbortSignal ) { - // Route the shape to the physical table the run lives in: a v2 run's id is - // a KSUID (task_run_v2), a legacy run's a cuid (TaskRun). The run was - // already resolved by the route, so this id is authoritative. - const table = isKsuidId(runId) ? TASK_RUN_V2_TABLE : TASK_RUN_TABLE; return this.#streamRunsWhere( url, environment, `id='${runId}'`, - table, apiVersion, requestOptions, clientVersion, @@ -167,7 +145,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - return this.#streamRunsAcrossTables( + return this.#streamRunsWhere( url, environment, whereClause, @@ -201,7 +179,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - const response = await this.#streamRunsAcrossTables( + const response = await this.#streamRunsWhere( url, environment, whereClause, @@ -300,7 +278,6 @@ export class RealtimeClient { url: URL | string, environment: RealtimeEnvironment, whereClause: string, - table: string, apiVersion: API_VERSIONS, requestOptions?: RealtimeRequestOptions, clientVersion?: string, @@ -310,7 +287,6 @@ export class RealtimeClient { url, environment, whereClause, - table, requestOptions, clientVersion ); @@ -324,266 +300,10 @@ export class RealtimeClient { ); } - // Stream a feed that spans BOTH physical run tables (the tag-list and batch - // feeds) by running two upstream Electric shapes — public."TaskRun" and - // public."task_run_v2" — under a single composite continuation the client - // round-trips opaquely. A run lives in exactly one table, so the union of the - // two shapes is the full feed; the client merges by row key and never learns - // there are two shapes. See electricShapeMerge.server.ts for the pure logic. - // - // Cost: this opens TWO upstream Electric long-polls per tag/batch - // subscription (vs one for a single-table feed), so these feeds use ~2x - // Electric connections while an org has runs across both tables. Single-run - // subscriptions are unaffected — one shape, routed to the run's table by id - // format. - async #streamRunsAcrossTables( - url: URL | string, - environment: RealtimeEnvironment, - whereClause: string, - apiVersion: API_VERSIONS, - requestOptions?: RealtimeRequestOptions, - clientVersion?: string, - signal?: AbortSignal - ): Promise { - const $url = new URL(url.toString()); - const isLive = isLiveRequestUrl($url); - const incomingHandle = extractShapeId($url); - const incomingOffset = $url.searchParams.get("offset") ?? "-1"; - const incomingCursor = $url.searchParams.get("cursor"); - - const handles = decodeCompositePart(incomingHandle); - const offsets = decodeCompositeOffset(incomingOffset); - const cursors = decodeCompositePart(incomingCursor); - - const prior: PriorContinuation = { - handleA: handles.a, - offsetA: offsets.a, - cursorA: cursors.a, - handleB: handles.b, - offsetB: offsets.b, - cursorB: cursors.b, - }; - - const urlA = this.#constructMergeShapeUrl( - $url, - environment, - whereClause, - TASK_RUN_TABLE, - { handle: handles.a, offset: offsets.a, cursor: cursors.a }, - requestOptions, - clientVersion - ); - const urlB = this.#constructMergeShapeUrl( - $url, - environment, - whereClause, - TASK_RUN_V2_TABLE, - { handle: handles.b, offset: offsets.b, cursor: cursors.b }, - requestOptions, - clientVersion - ); - - // One concurrency slot for the composite live request: it maps to a single - // client request even though we fan out to two upstream long-polls. - let requestId: string | undefined; - if (isLive && incomingHandle) { - const concurrencyLimit = await this.cachedLimitProvider.getCachedLimit( - environment.organizationId, - 100_000 - ); - if (!concurrencyLimit) { - logger.error("Failed to get concurrency limit", { - organizationId: environment.organizationId, - }); - return json({ error: "Failed to get concurrency limit" }, { status: 500 }); - } - requestId = randomUUID(); - if (!(await this.#incrementAndCheck(environment.id, requestId, concurrencyLimit))) { - return json({ error: "Too many concurrent requests" }, { status: 429 }); - } - } - - try { - const merged = await this.#raceAndMergeShapes(urlA, urlB, isLive, prior, signal); - return this.#buildMergeResponse(merged, isLive, apiVersion, clientVersion); - } finally { - if (requestId) { - await this.#decrementConcurrency(environment.id, requestId); - } - } - } - - // Build the per-table Electric URL, replacing the composite continuation the - // client sent with this table's decoded part. - #constructMergeShapeUrl( - baseUrl: URL, - environment: RealtimeEnvironment, - whereClause: string, - table: string, - perTable: { handle?: string; offset: string; cursor?: string }, - requestOptions?: RealtimeRequestOptions, - clientVersion?: string - ): URL { - const electricUrl = this.#constructRunsElectricUrl( - baseUrl, - environment, - whereClause, - table, - requestOptions, - clientVersion - ); - // Upstream always speaks current Electric (handle, not shape_id). - electricUrl.searchParams.delete("shape_id"); - if (perTable.handle !== undefined) { - electricUrl.searchParams.set("handle", perTable.handle); - } else { - electricUrl.searchParams.delete("handle"); - } - electricUrl.searchParams.set("offset", perTable.offset); - if (perTable.cursor !== undefined) { - electricUrl.searchParams.set("cursor", perTable.cursor); - } else { - electricUrl.searchParams.delete("cursor"); - } - return electricUrl; - } - - // Fetch both shapes. For a live request, return as soon as ONE yields changes - // (or needs a refetch) and carry the other's prior continuation forward — so a - // change on either table isn't delayed by the other's idle long-poll. If the - // first to settle had nothing, wait for the other before responding. - async #raceAndMergeShapes( - urlA: URL, - urlB: URL, - isLive: boolean, - prior: PriorContinuation, - signal?: AbortSignal - ): Promise { - const ctlA = new AbortController(); - const ctlB = new AbortController(); - const link = (ctl: AbortController) => - signal ? AbortSignal.any([signal, ctl.signal]) : ctl.signal; - - let aRes: ParsedShape | undefined; - let bRes: ParsedShape | undefined; - const pA = this.#fetchShape(urlA, link(ctlA)).then((r) => { - aRes = r; - return "a" as const; - }); - const pB = this.#fetchShape(urlB, link(ctlB)).then((r) => { - bRes = r; - return "b" as const; - }); - // A shape we don't end up awaiting (the race loser we abort, or the sibling - // left pending when the catch below rethrows) must not surface as an - // unhandled rejection. Attach detached no-op catches up front; the - // race/await paths still observe the original rejections through their own - // reactions, so this only swallows an otherwise-orphaned rejection. - void pA.catch(() => {}); - void pB.catch(() => {}); - - try { - if (!isLive) { - await Promise.all([pA, pB]); - return mergeParsedShapes(aRes!, bRes!, prior); - } - - const actionable = (r: ParsedShape) => - r.mustRefetch || r.status >= 400 || r.changes.length > 0; - - const first = await Promise.race([pA, pB]); - const firstRes = first === "a" ? aRes! : bRes!; - if (actionable(firstRes)) { - // Got changes/refetch from one shape; abort the other and return - // immediately. Its rejection is already swallowed by the catch attached - // above, so the abort can't surface as an unhandled rejection. - (first === "a" ? ctlB : ctlA).abort(); - return first === "a" - ? mergeParsedShapes(aRes!, unpolledShape("b", prior), prior) - : mergeParsedShapes(unpolledShape("a", prior), bRes!, prior); - } - - // First settled empty (idle timeout) — wait for the other. - await (first === "a" ? pB : pA); - return mergeParsedShapes(aRes!, bRes!, prior); - } catch (error) { - ctlA.abort(); - ctlB.abort(); - throw error; - } - } - - async #fetchShape(electricUrl: URL, signal?: AbortSignal): Promise { - const resp = await longPollingFetch(electricUrl.toString(), { signal }); - const headers = { - handle: - resp.headers.get("electric-handle") ?? resp.headers.get("electric-shape-id") ?? undefined, - offset: - resp.headers.get("electric-offset") ?? - resp.headers.get("electric-chunk-last-offset") ?? - undefined, - cursor: resp.headers.get("electric-cursor") ?? undefined, - schema: resp.headers.get("electric-schema") ?? undefined, - }; - if (resp.status >= 400) { - try { - await resp.body?.cancel(); - } catch {} - return parseShapeMessages(resp.status, headers, ""); - } - const bodyText = await resp.text(); - return parseShapeMessages(resp.status, headers, bodyText); - } - - #buildMergeResponse( - merged: MergedShape, - isLive: boolean, - apiVersion: API_VERSIONS, - clientVersion?: string - ): Response { - const responseHeaders = new Headers(); - responseHeaders.set("content-type", "application/json"); - responseHeaders.set("cache-control", "no-store"); - // Match the native client: expose electric-* headers cross-origin or the - // deployed react-hooks fail with MissingHeadersError. - responseHeaders.set("access-control-allow-origin", "*"); - responseHeaders.set("access-control-expose-headers", "*"); - - if (merged.mustRefetch) { - // Reset the client's shape state; it refetches both tables from scratch. - return new Response(JSON.stringify([MUST_REFETCH_MESSAGE, UP_TO_DATE_MESSAGE]), { - status: 409, - headers: responseHeaders, - }); - } - - if (clientVersion) { - responseHeaders.set("electric-handle", merged.handle); - responseHeaders.set("electric-offset", merged.offset); - } else { - responseHeaders.set("electric-shape-id", merged.handle); - responseHeaders.set("electric-chunk-last-offset", merged.offset); - } - if (isLive) { - // The client requires electric-cursor on every live response (its live - // cache-buster). Fall back to the offset if neither shape provided one. - responseHeaders.set("electric-cursor", merged.cursor ?? merged.offset); - } else if (merged.schema !== undefined) { - // Non-live responses require electric-schema. - responseHeaders.set("electric-schema", merged.schema); - } - - const body = JSON.stringify([...merged.changes, UP_TO_DATE_MESSAGE]); - const finalBody = - apiVersion === CURRENT_API_VERSION ? body : this.#rewriteResponseBodyForNoneApiVersion(body); - return new Response(finalBody, { status: 200, headers: responseHeaders }); - } - #constructRunsElectricUrl( url: URL | string, environment: RealtimeEnvironment, whereClause: string, - table: string, requestOptions?: RealtimeRequestOptions, clientVersion?: string ): URL { @@ -602,7 +322,7 @@ export class RealtimeClient { }); electricUrl.searchParams.set("where", whereClause); - electricUrl.searchParams.set("table", table); + electricUrl.searchParams.set("table", 'public."TaskRun"'); if (!clientVersion) { // If the client version is not provided, that means we're using an older client diff --git a/apps/webapp/test/electricShapeMerge.test.ts b/apps/webapp/test/electricShapeMerge.test.ts deleted file mode 100644 index 7f0bf9e0b5b..00000000000 --- a/apps/webapp/test/electricShapeMerge.test.ts +++ /dev/null @@ -1,201 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { - decodeCompositeOffset, - decodeCompositePart, - encodeComposite, - mergeParsedShapes, - parseShapeMessages, - unpolledShape, - type ParsedShape, - type PriorContinuation, -} from "~/services/realtime/electricShapeMerge.server"; - -const INSERT = { - key: '"public"."TaskRun"/"r1"', - value: { id: "r1" }, - headers: { operation: "insert" }, -}; -const UPDATE = { - key: '"public"."task_run_v2"/"r2"', - value: { id: "r2" }, - headers: { operation: "update" }, -}; - -function shape(overrides: Partial = {}): ParsedShape { - return { - status: 200, - handle: "h", - offset: "o", - cursor: "c", - schema: '{"id":{"type":"text"}}', - changes: [], - upToDate: true, - mustRefetch: false, - ...overrides, - }; -} - -const PRIOR: PriorContinuation = { - handleA: "HA", - offsetA: "OA", - cursorA: "CA", - handleB: "HB", - offsetB: "OB", - cursorB: "CB", -}; - -describe("decodeCompositePart", () => { - it("returns both undefined for null / no separator", () => { - expect(decodeCompositePart(null)).toEqual({ a: undefined, b: undefined }); - expect(decodeCompositePart(undefined)).toEqual({ a: undefined, b: undefined }); - expect(decodeCompositePart("")).toEqual({ a: undefined, b: undefined }); - // A bare value with no separator means "not a composite yet" -> initial. - expect(decodeCompositePart("solo")).toEqual({ a: undefined, b: undefined }); - }); - - it("splits a composite into its two parts", () => { - expect(decodeCompositePart("hA~hB")).toEqual({ a: "hA", b: "hB" }); - }); - - it("treats an empty side as undefined", () => { - expect(decodeCompositePart("hA~")).toEqual({ a: "hA", b: undefined }); - expect(decodeCompositePart("~hB")).toEqual({ a: undefined, b: "hB" }); - }); -}); - -describe("decodeCompositeOffset", () => { - it("applies a bare offset (e.g. the initial -1) to both shapes", () => { - expect(decodeCompositeOffset("-1")).toEqual({ a: "-1", b: "-1" }); - }); - - it("splits a composite offset", () => { - expect(decodeCompositeOffset("26800552_0~26800999_2")).toEqual({ - a: "26800552_0", - b: "26800999_2", - }); - }); - - it("round-trips through encodeComposite", () => { - expect(decodeCompositeOffset(encodeComposite("x_1", "y_2"))).toEqual({ a: "x_1", b: "y_2" }); - }); -}); - -describe("parseShapeMessages", () => { - const headers = { handle: "h", offset: "o", cursor: "c", schema: "s" }; - - it("extracts change rows and the up-to-date flag", () => { - const body = JSON.stringify([INSERT, { headers: { control: "up-to-date" } }]); - const parsed = parseShapeMessages(200, headers, body); - expect(parsed.changes).toEqual([INSERT]); - expect(parsed.upToDate).toBe(true); - expect(parsed.mustRefetch).toBe(false); - }); - - it("treats a bare up-to-date as no changes", () => { - const parsed = parseShapeMessages( - 200, - headers, - JSON.stringify([{ headers: { control: "up-to-date" } }]) - ); - expect(parsed.changes).toEqual([]); - expect(parsed.upToDate).toBe(true); - }); - - it("flags must-refetch from a 409 status", () => { - const parsed = parseShapeMessages(409, headers, ""); - expect(parsed.mustRefetch).toBe(true); - expect(parsed.changes).toEqual([]); - }); - - it("flags must-refetch from a control message", () => { - const body = JSON.stringify([ - { headers: { control: "must-refetch" } }, - { headers: { control: "up-to-date" } }, - ]); - expect(parseShapeMessages(200, headers, body).mustRefetch).toBe(true); - }); - - it("flags must-refetch for an unparseable / non-array body", () => { - expect(parseShapeMessages(200, headers, "not json").mustRefetch).toBe(true); - expect(parseShapeMessages(200, headers, "{}").mustRefetch).toBe(true); - }); - - it("treats an empty body as no changes (not up-to-date)", () => { - const parsed = parseShapeMessages(200, headers, ""); - expect(parsed.changes).toEqual([]); - expect(parsed.upToDate).toBe(false); - expect(parsed.mustRefetch).toBe(false); - }); -}); - -describe("mergeParsedShapes", () => { - it("concatenates change rows from both tables", () => { - const merged = mergeParsedShapes( - shape({ changes: [INSERT], handle: "hA", offset: "oA", cursor: "cA" }), - shape({ changes: [UPDATE], handle: "hB", offset: "oB", cursor: "cB" }), - PRIOR - ); - expect(merged.mustRefetch).toBe(false); - if (merged.mustRefetch) return; - expect(merged.changes).toEqual([INSERT, UPDATE]); - expect(merged.handle).toBe(encodeComposite("hA", "hB")); - expect(merged.offset).toBe(encodeComposite("oA", "oB")); - expect(merged.cursor).toBe(encodeComposite("cA", "cB")); - }); - - it("resets when either shape needs a refetch", () => { - expect(mergeParsedShapes(shape({ mustRefetch: true }), shape(), PRIOR)).toEqual({ - mustRefetch: true, - }); - expect(mergeParsedShapes(shape(), shape({ status: 409 }), PRIOR)).toEqual({ - mustRefetch: true, - }); - }); - - it("falls back to the prior continuation for a shape that returned nothing", () => { - // B was left un-polled (the other table returned changes first). - const merged = mergeParsedShapes( - shape({ changes: [INSERT], handle: "hA2", offset: "oA2", cursor: "cA2" }), - unpolledShape("b", PRIOR), - PRIOR - ); - expect(merged.mustRefetch).toBe(false); - if (merged.mustRefetch) return; - expect(merged.changes).toEqual([INSERT]); - expect(merged.handle).toBe(encodeComposite("hA2", "HB")); - expect(merged.offset).toBe(encodeComposite("oA2", "OB")); - expect(merged.cursor).toBe(encodeComposite("cA2", "CB")); - }); - - it("uses the prior cursor when a returned shape omits it", () => { - const merged = mergeParsedShapes( - shape({ cursor: undefined, handle: "hA", offset: "oA" }), - shape({ cursor: "cB", handle: "hB", offset: "oB" }), - PRIOR - ); - if (merged.mustRefetch) throw new Error("unexpected refetch"); - // a omitted cursor -> prior.cursorA ("CA"); b returned "cB". - expect(merged.cursor).toBe(encodeComposite("CA", "cB")); - }); - - it("omits the cursor entirely when neither shape nor prior has one (initial snapshot)", () => { - const initialPrior: PriorContinuation = { offsetA: "-1", offsetB: "-1" }; - const merged = mergeParsedShapes( - shape({ cursor: undefined, handle: "hA", offset: "oA" }), - shape({ cursor: undefined, handle: "hB", offset: "oB" }), - initialPrior - ); - if (merged.mustRefetch) throw new Error("unexpected refetch"); - expect(merged.cursor).toBeUndefined(); - }); - - it("carries schema from whichever shape supplied it", () => { - const merged = mergeParsedShapes( - shape({ schema: undefined }), - shape({ schema: '{"id":{"type":"text"}}' }), - PRIOR - ); - if (merged.mustRefetch) throw new Error("unexpected refetch"); - expect(merged.schema).toBe('{"id":{"type":"text"}}'); - }); -}); diff --git a/apps/webapp/test/realtimeClient.test.ts b/apps/webapp/test/realtimeClient.test.ts index cdff50e3d18..d98213e5b17 100644 --- a/apps/webapp/test/realtimeClient.test.ts +++ b/apps/webapp/test/realtimeClient.test.ts @@ -237,13 +237,8 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { const chunkOffset = headers["electric-offset"]; expect(response.status).toBe(200); - // The tag/list feed spans both physical run tables, so streamRuns merges - // two upstream Electric shapes (TaskRun + task_run_v2) under one composite - // cursor: handle and offset each pack the two per-table values joined by - // "~". Both shapes are at "0_0" for the initial snapshot. expect(shapeId).toBeDefined(); - expect(shapeId).toContain("~"); - expect(chunkOffset).toBe("0_0~0_0"); + expect(chunkOffset).toBe("0_0"); } ); From 760c24c54666de18d1b45f0278c0c55541e7ec1e Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 16:03:07 +0100 Subject: [PATCH 61/83] fix(webapp): gate runTableV2 on the native realtime backend Completes the Electric-merge removal: a run only routes to task_run_v2 when the deployment has native realtime enabled and the org's realtimeBackend flag is native. Electric shapes are single-table and can't observe a v2 run, so without this gate a v2 run would be realtime-invisible. shouldUseV2RunTable takes the native-realtime master switch as a parameter (kept env-free for unit tests); the trigger mint site and the idempotency pre-gate claim both pass it. --- .../concerns/idempotencyKeys.server.ts | 4 +- .../runEngine/services/triggerTask.server.ts | 4 +- apps/webapp/app/v3/runTableV2.server.ts | 41 +++++++++++++++-- apps/webapp/test/runTableV2.test.ts | 46 ++++++++++++++----- 4 files changed, 78 insertions(+), 17 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index e8dd61c5c87..be9f5f92a74 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -341,7 +341,9 @@ export class IdempotencyKeyConcern { // and don't apply to the cross-table concern. shouldUseV2RunTable is // checked first so a v2 org skips the mollifier-flag resolve entirely. const claimEligible = - shouldUseV2RunTable(orgFeatureFlags) || + shouldUseV2RunTable(orgFeatureFlags, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }) || (!request.body.options?.resumeParentOnCompletion && !request.body.options?.debounce && !request.options?.oneTimeUseToken && diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 8c61b7d7fcd..e74be26c366 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -159,7 +159,9 @@ export class RunEngineTriggerTaskService { // trigger hot path. Downstream routing is by id format only. const runFriendlyId = options?.runFriendlyId ?? - (shouldUseV2RunTable(environment.organization.featureFlags) + (shouldUseV2RunTable(environment.organization.featureFlags, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }) ? RunId.generateKsuid() : RunId.generate() ).friendlyId; diff --git a/apps/webapp/app/v3/runTableV2.server.ts b/apps/webapp/app/v3/runTableV2.server.ts index 51b55aefbe3..5fa089fbd6e 100644 --- a/apps/webapp/app/v3/runTableV2.server.ts +++ b/apps/webapp/app/v3/runTableV2.server.ts @@ -1,5 +1,15 @@ import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags"; +export type ShouldUseV2RunTableOptions = { + /** + * Whether the native realtime backend is enabled for this deployment + * (`env.REALTIME_BACKEND_NATIVE_ENABLED === "1"`). Passed in rather than read + * from env here so this stays a pure, env-free function the caller can + * unit-test directly. + */ + nativeRealtimeEnabled: boolean; +}; + /** * Per-org cutover switch for the parallel `task_run_v2` run table. * @@ -9,20 +19,45 @@ import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags"; * routes the run to `task_run_v2`; off (the default) → mint a legacy id, which * routes to `TaskRun`. * + * GATED ON NATIVE REALTIME. The Electric realtime backend serves shapes bound + * to a single table (`TaskRun`) and is being retired; only the native backend + * is table-agnostic and can observe a `task_run_v2` run in realtime + * (subscribeToRun / useRealtimeRun / poll). Routing a run to v2 while the org is + * still served by Electric would make that run silently invisible in realtime, + * so v2 requires BOTH the deployment master switch (`nativeRealtimeEnabled`) and + * the org's `realtimeBackend` flag set to "native". This is a temporary + * coupling: once Electric is removed and native is the only/default backend, + * drop the native check. + * * RunStore never reads this flag: it routes purely by id format. The flag only * decides which id scheme is minted upstream. Disabling it sends only NEW runs * back to legacy; runs already created on v2 stay readable there (routed by id). */ -export function shouldUseV2RunTable(orgFeatureFlags: unknown): boolean { +export function shouldUseV2RunTable( + orgFeatureFlags: unknown, + options: ShouldUseV2RunTableOptions +): boolean { if (orgFeatureFlags === null || typeof orgFeatureFlags !== "object") { return false; } + const flags = orgFeatureFlags as Record; - const override = (orgFeatureFlags as Record)[FEATURE_FLAG.runTableV2]; - if (override === undefined) { + // Native realtime is a hard prerequisite (see doc comment): a v2 run is only + // observable in realtime on the native backend. + if (!options.nativeRealtimeEnabled) { + return false; + } + const backend = FeatureFlagCatalog[FEATURE_FLAG.realtimeBackend].safeParse( + flags[FEATURE_FLAG.realtimeBackend] + ); + if (!(backend.success && backend.data === "native")) { return false; } + const override = flags[FEATURE_FLAG.runTableV2]; + if (override === undefined) { + return false; + } const parsed = FeatureFlagCatalog[FEATURE_FLAG.runTableV2].safeParse(override); return parsed.success ? parsed.data : false; } diff --git a/apps/webapp/test/runTableV2.test.ts b/apps/webapp/test/runTableV2.test.ts index 9abae4cb7bb..7aa528b34a9 100644 --- a/apps/webapp/test/runTableV2.test.ts +++ b/apps/webapp/test/runTableV2.test.ts @@ -1,28 +1,50 @@ import { describe, expect, it } from "vitest"; import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; +// v2 is gated on the org being served realtime by the NATIVE backend (Electric +// can't observe task_run_v2). That requires the deployment master switch +// (nativeRealtimeEnabled) AND the per-org `realtimeBackend` flag set to "native". +const NATIVE_ON = { nativeRealtimeEnabled: true }; +const NATIVE_OFF = { nativeRealtimeEnabled: false }; +const onNative = (extra: Record = {}) => ({ realtimeBackend: "native", ...extra }); + describe("shouldUseV2RunTable", () => { it("defaults to false when the org has no flags", () => { - expect(shouldUseV2RunTable(null)).toBe(false); - expect(shouldUseV2RunTable(undefined)).toBe(false); - expect(shouldUseV2RunTable({})).toBe(false); + expect(shouldUseV2RunTable(null, NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(undefined, NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable({}, NATIVE_ON)).toBe(false); + }); + + it("returns true only when runTableV2 is boolean true AND the org is on native realtime", () => { + expect(shouldUseV2RunTable(onNative({ runTableV2: true }), NATIVE_ON)).toBe(true); + expect(shouldUseV2RunTable(onNative({ runTableV2: false }), NATIVE_ON)).toBe(false); }); - it("returns true only when the flag is the boolean true", () => { - expect(shouldUseV2RunTable({ runTableV2: true })).toBe(true); - expect(shouldUseV2RunTable({ runTableV2: false })).toBe(false); + it("requires the native realtime backend (Electric can't observe v2 runs)", () => { + // runTableV2 on, but the org is not on native realtime → no v2 (it would be + // realtime-invisible). + expect(shouldUseV2RunTable({ runTableV2: true }, NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable({ runTableV2: true, realtimeBackend: "electric" }, NATIVE_ON)).toBe( + false + ); + expect(shouldUseV2RunTable({ runTableV2: true, realtimeBackend: "shadow" }, NATIVE_ON)).toBe( + false + ); + // On native per-org, but the deployment master switch is off → effectively + // still Electric → no v2. + expect(shouldUseV2RunTable(onNative({ runTableV2: true }), NATIVE_OFF)).toBe(false); }); it("rejects a stringified flag value (strict boolean, no coercion)", () => { // A stringified "false" must not coerce to true and cut the org over. - expect(shouldUseV2RunTable({ runTableV2: "true" })).toBe(false); - expect(shouldUseV2RunTable({ runTableV2: "false" })).toBe(false); - expect(shouldUseV2RunTable({ runTableV2: 1 })).toBe(false); + expect(shouldUseV2RunTable(onNative({ runTableV2: "true" }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(onNative({ runTableV2: "false" }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(onNative({ runTableV2: 1 }), NATIVE_ON)).toBe(false); }); it("ignores unrelated flags and non-object inputs", () => { - expect(shouldUseV2RunTable({ mollifierEnabled: true })).toBe(false); - expect(shouldUseV2RunTable("runTableV2")).toBe(false); - expect(shouldUseV2RunTable(42)).toBe(false); + expect(shouldUseV2RunTable(onNative({ mollifierEnabled: true }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable("runTableV2", NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(42, NATIVE_ON)).toBe(false); }); }); From c4d8c4bdd4649813060a797e278071bcf340160b Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 16:53:03 +0100 Subject: [PATCH 62/83] fix(webapp): serve task_run_v2 runs over Electric realtime Restore the both-table Electric shape merge so tag-list and batch realtime feeds observe runs in TaskRun and task_run_v2 together, and gate the v2 run table on the runTableV2 flag alone (drop the native-realtime coupling). New runs route to task_run_v2 whenever an org has the flag on and stay visible in realtime on the existing Electric backend. Single-run feeds route to one table by id format; only tag and batch feeds fan out to both shapes under one composite continuation. --- .../concerns/idempotencyKeys.server.ts | 4 +- .../runEngine/services/triggerTask.server.ts | 4 +- .../realtime/electricShapeMerge.server.ts | 167 ++++++++++ .../app/services/realtimeClient.server.ts | 288 +++++++++++++++++- apps/webapp/app/v3/runTableV2.server.ts | 41 +-- apps/webapp/test/electricShapeMerge.test.ts | 201 ++++++++++++ apps/webapp/test/realtimeClient.test.ts | 7 +- apps/webapp/test/runTableV2.test.ts | 46 +-- 8 files changed, 675 insertions(+), 83 deletions(-) create mode 100644 apps/webapp/app/services/realtime/electricShapeMerge.server.ts create mode 100644 apps/webapp/test/electricShapeMerge.test.ts diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index be9f5f92a74..e8dd61c5c87 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -341,9 +341,7 @@ export class IdempotencyKeyConcern { // and don't apply to the cross-table concern. shouldUseV2RunTable is // checked first so a v2 org skips the mollifier-flag resolve entirely. const claimEligible = - shouldUseV2RunTable(orgFeatureFlags, { - nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", - }) || + shouldUseV2RunTable(orgFeatureFlags) || (!request.body.options?.resumeParentOnCompletion && !request.body.options?.debounce && !request.options?.oneTimeUseToken && diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index e74be26c366..8c61b7d7fcd 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -159,9 +159,7 @@ export class RunEngineTriggerTaskService { // trigger hot path. Downstream routing is by id format only. const runFriendlyId = options?.runFriendlyId ?? - (shouldUseV2RunTable(environment.organization.featureFlags, { - nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", - }) + (shouldUseV2RunTable(environment.organization.featureFlags) ? RunId.generateKsuid() : RunId.generate() ).friendlyId; diff --git a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts new file mode 100644 index 00000000000..cfe494e4a47 --- /dev/null +++ b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts @@ -0,0 +1,167 @@ +/** + * Pure helpers for merging TWO upstream Electric shapes (one per physical run + * table — `TaskRun` and `task_run_v2`) into a single shape the realtime client + * consumes. A tag-list or batch feed matches runs in both tables during/after a + * `runTableV2` cutover, but an Electric shape is bound to one table, so the + * proxy fans out to two shapes and presents one composite continuation + * (`handle` / `offset` / `cursor`) that the client round-trips opaquely. + * + * Kept dependency-free (no DB/Redis/fetch) so the merge logic is unit-testable. + */ + +// Separator packing the two per-table continuation values into one opaque +// token. Electric's handle/offset/cursor values are alphanumeric plus `_`/`-` +// (UUID-ish handles, `_` offsets, numeric cursors) and never contain +// `~`, so it is collision-free for this charset. +export const COMPOSITE_SEP = "~"; + +export const UP_TO_DATE_MESSAGE = { headers: { control: "up-to-date" } } as const; +export const MUST_REFETCH_MESSAGE = { headers: { control: "must-refetch" } } as const; + +/** A parsed per-table shape response: continuation headers + the change rows. */ +export type ParsedShape = { + status: number; + handle?: string; + offset?: string; + cursor?: string; + schema?: string; + /** Change messages only (control messages stripped). */ + changes: unknown[]; + upToDate: boolean; + mustRefetch: boolean; +}; + +/** The prior per-table continuation the client sent (used when a shape is left + * un-polled because the other returned first). */ +export type PriorContinuation = { + handleA?: string; + offsetA: string; + cursorA?: string; + handleB?: string; + offsetB: string; + cursorB?: string; +}; + +export type MergedShape = + | { mustRefetch: true } + | { + mustRefetch: false; + changes: unknown[]; + handle: string; + offset: string; + cursor?: string; + schema?: string; + }; + +/** + * Split a composite "~" value back into its per-table parts. A value with + * no separator (or null/empty) means the client hasn't been handed a composite + * yet (the initial request before any shape exists) -> both undefined. + */ +export function decodeCompositePart(value: string | null | undefined): { + a: string | undefined; + b: string | undefined; +} { + if (!value) return { a: undefined, b: undefined }; + const idx = value.indexOf(COMPOSITE_SEP); + if (idx === -1) return { a: undefined, b: undefined }; + return { + a: value.slice(0, idx) || undefined, + b: value.slice(idx + COMPOSITE_SEP.length) || undefined, + }; +} + +/** + * The offset is never absent — Electric uses "-1" for the initial request — so + * a bare value applies to BOTH shapes (initial), and a composite splits. + */ +export function decodeCompositeOffset(offset: string): { a: string; b: string } { + const idx = offset.indexOf(COMPOSITE_SEP); + if (idx === -1) return { a: offset, b: offset }; + return { a: offset.slice(0, idx), b: offset.slice(idx + COMPOSITE_SEP.length) }; +} + +export function encodeComposite(a: string, b: string): string { + return `${a}${COMPOSITE_SEP}${b}`; +} + +/** Parse the raw body + headers of one upstream shape response. */ +export function parseShapeMessages( + status: number, + headers: { + handle?: string; + offset?: string; + cursor?: string; + schema?: string; + }, + bodyText: string +): ParsedShape { + const base = { status, ...headers }; + if (status >= 400) { + return { ...base, changes: [], upToDate: false, mustRefetch: status === 409 }; + } + let parsed: unknown; + try { + parsed = bodyText.trim() ? JSON.parse(bodyText) : []; + } catch { + // Unparseable body — safest is to make the client refetch the shape. + return { ...base, changes: [], upToDate: false, mustRefetch: true }; + } + if (!Array.isArray(parsed)) { + return { ...base, changes: [], upToDate: false, mustRefetch: true }; + } + const messages = parsed as Array<{ headers?: { control?: string } }>; + const changes = messages.filter((m) => !m?.headers?.control); + const mustRefetch = messages.some((m) => m?.headers?.control === "must-refetch"); + const upToDate = messages.some((m) => m?.headers?.control === "up-to-date"); + return { ...base, changes, upToDate, mustRefetch }; +} + +/** + * Merge two parsed per-table shapes into one composite payload. If either shape + * needs a refetch (409 / must-refetch / unparseable), the whole composite is + * reset. Otherwise the change rows are concatenated (the client merges by key, + * so order across tables doesn't matter) and the continuation values are packed + * per table, falling back to the client's prior value for a shape that wasn't + * re-polled this round. + */ +export function mergeParsedShapes( + a: ParsedShape, + b: ParsedShape, + prior: PriorContinuation +): MergedShape { + if (a.mustRefetch || b.mustRefetch || a.status >= 400 || b.status >= 400) { + return { mustRefetch: true }; + } + const cursorA = a.cursor ?? prior.cursorA; + const cursorB = b.cursor ?? prior.cursorB; + const cursor = + cursorA !== undefined || cursorB !== undefined + ? encodeComposite(cursorA ?? "", cursorB ?? "") + : undefined; + return { + mustRefetch: false, + changes: [...a.changes, ...b.changes], + handle: encodeComposite(a.handle ?? prior.handleA ?? "", b.handle ?? prior.handleB ?? ""), + offset: encodeComposite(a.offset ?? prior.offsetA, b.offset ?? prior.offsetB), + cursor, + schema: a.schema ?? b.schema, + }; +} + +/** A synthetic "no change this round" result for a shape left un-polled because + * the other returned changes first; carries its prior continuation forward. */ +export function unpolledShape( + which: "a" | "b", + prior: PriorContinuation +): ParsedShape { + return { + status: 200, + handle: which === "a" ? prior.handleA : prior.handleB, + offset: which === "a" ? prior.offsetA : prior.offsetB, + cursor: which === "a" ? prior.cursorA : prior.cursorB, + changes: [], + upToDate: true, + mustRefetch: false, + }; +} diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index 12b93f1996d..d2e68c64e4e 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -1,6 +1,18 @@ import { json } from "@remix-run/server-runtime"; import { tryCatch } from "@trigger.dev/core/utils"; -import { safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; +import { isKsuidId, safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; +import { + decodeCompositeOffset, + decodeCompositePart, + mergeParsedShapes, + MUST_REFETCH_MESSAGE, + parseShapeMessages, + unpolledShape, + UP_TO_DATE_MESSAGE, + type MergedShape, + type ParsedShape, + type PriorContinuation, +} from "./realtime/electricShapeMerge.server"; import { Callback, Result } from "ioredis"; import { randomUUID } from "node:crypto"; import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server"; @@ -49,6 +61,11 @@ const DEFAULT_ELECTRIC_COLUMNS = [ const RESERVED_COLUMNS = ["id", "taskIdentifier", "friendlyId", "status", "createdAt"]; const RESERVED_SEARCH_PARAMS = ["createdAt", "tags", "skipColumns"]; +// The two physical run tables a realtime shape can target. A run lives in +// exactly one, keyed by id format (ksuid -> task_run_v2, cuid -> TaskRun). +const TASK_RUN_TABLE = 'public."TaskRun"'; +const TASK_RUN_V2_TABLE = 'public."task_run_v2"'; + export type RealtimeClientOptions = { electricOrigin: string | string[]; redis: RedisWithClusterOptions; @@ -118,10 +135,15 @@ export class RealtimeClient { clientVersion?: string, signal?: AbortSignal ) { + // Route the shape to the physical table the run lives in: a v2 run's id is + // a KSUID (task_run_v2), a legacy run's a cuid (TaskRun). The run was + // already resolved by the route, so this id is authoritative. + const table = isKsuidId(runId) ? TASK_RUN_V2_TABLE : TASK_RUN_TABLE; return this.#streamRunsWhere( url, environment, `id='${runId}'`, + table, apiVersion, requestOptions, clientVersion, @@ -145,7 +167,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - return this.#streamRunsWhere( + return this.#streamRunsAcrossTables( url, environment, whereClause, @@ -179,7 +201,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - const response = await this.#streamRunsWhere( + const response = await this.#streamRunsAcrossTables( url, environment, whereClause, @@ -278,6 +300,7 @@ export class RealtimeClient { url: URL | string, environment: RealtimeEnvironment, whereClause: string, + table: string, apiVersion: API_VERSIONS, requestOptions?: RealtimeRequestOptions, clientVersion?: string, @@ -287,6 +310,7 @@ export class RealtimeClient { url, environment, whereClause, + table, requestOptions, clientVersion ); @@ -300,10 +324,266 @@ export class RealtimeClient { ); } + // Stream a feed that spans BOTH physical run tables (the tag-list and batch + // feeds) by running two upstream Electric shapes — public."TaskRun" and + // public."task_run_v2" — under a single composite continuation the client + // round-trips opaquely. A run lives in exactly one table, so the union of the + // two shapes is the full feed; the client merges by row key and never learns + // there are two shapes. See electricShapeMerge.server.ts for the pure logic. + // + // Cost: this opens TWO upstream Electric long-polls per tag/batch + // subscription (vs one for a single-table feed), so these feeds use ~2x + // Electric connections while an org has runs across both tables. Single-run + // subscriptions are unaffected — one shape, routed to the run's table by id + // format. + async #streamRunsAcrossTables( + url: URL | string, + environment: RealtimeEnvironment, + whereClause: string, + apiVersion: API_VERSIONS, + requestOptions?: RealtimeRequestOptions, + clientVersion?: string, + signal?: AbortSignal + ): Promise { + const $url = new URL(url.toString()); + const isLive = isLiveRequestUrl($url); + const incomingHandle = extractShapeId($url); + const incomingOffset = $url.searchParams.get("offset") ?? "-1"; + const incomingCursor = $url.searchParams.get("cursor"); + + const handles = decodeCompositePart(incomingHandle); + const offsets = decodeCompositeOffset(incomingOffset); + const cursors = decodeCompositePart(incomingCursor); + + const prior: PriorContinuation = { + handleA: handles.a, + offsetA: offsets.a, + cursorA: cursors.a, + handleB: handles.b, + offsetB: offsets.b, + cursorB: cursors.b, + }; + + const urlA = this.#constructMergeShapeUrl( + $url, + environment, + whereClause, + TASK_RUN_TABLE, + { handle: handles.a, offset: offsets.a, cursor: cursors.a }, + requestOptions, + clientVersion + ); + const urlB = this.#constructMergeShapeUrl( + $url, + environment, + whereClause, + TASK_RUN_V2_TABLE, + { handle: handles.b, offset: offsets.b, cursor: cursors.b }, + requestOptions, + clientVersion + ); + + // One concurrency slot for the composite live request: it maps to a single + // client request even though we fan out to two upstream long-polls. + let requestId: string | undefined; + if (isLive && incomingHandle) { + const concurrencyLimit = await this.cachedLimitProvider.getCachedLimit( + environment.organizationId, + 100_000 + ); + if (!concurrencyLimit) { + logger.error("Failed to get concurrency limit", { + organizationId: environment.organizationId, + }); + return json({ error: "Failed to get concurrency limit" }, { status: 500 }); + } + requestId = randomUUID(); + if (!(await this.#incrementAndCheck(environment.id, requestId, concurrencyLimit))) { + return json({ error: "Too many concurrent requests" }, { status: 429 }); + } + } + + try { + const merged = await this.#raceAndMergeShapes(urlA, urlB, isLive, prior, signal); + return this.#buildMergeResponse(merged, isLive, apiVersion, clientVersion); + } finally { + if (requestId) { + await this.#decrementConcurrency(environment.id, requestId); + } + } + } + + // Build the per-table Electric URL, replacing the composite continuation the + // client sent with this table's decoded part. + #constructMergeShapeUrl( + baseUrl: URL, + environment: RealtimeEnvironment, + whereClause: string, + table: string, + perTable: { handle?: string; offset: string; cursor?: string }, + requestOptions?: RealtimeRequestOptions, + clientVersion?: string + ): URL { + const electricUrl = this.#constructRunsElectricUrl( + baseUrl, + environment, + whereClause, + table, + requestOptions, + clientVersion + ); + // Upstream always speaks current Electric (handle, not shape_id). + electricUrl.searchParams.delete("shape_id"); + if (perTable.handle !== undefined) { + electricUrl.searchParams.set("handle", perTable.handle); + } else { + electricUrl.searchParams.delete("handle"); + } + electricUrl.searchParams.set("offset", perTable.offset); + if (perTable.cursor !== undefined) { + electricUrl.searchParams.set("cursor", perTable.cursor); + } else { + electricUrl.searchParams.delete("cursor"); + } + return electricUrl; + } + + // Fetch both shapes. For a live request, return as soon as ONE yields changes + // (or needs a refetch) and carry the other's prior continuation forward — so a + // change on either table isn't delayed by the other's idle long-poll. If the + // first to settle had nothing, wait for the other before responding. + async #raceAndMergeShapes( + urlA: URL, + urlB: URL, + isLive: boolean, + prior: PriorContinuation, + signal?: AbortSignal + ): Promise { + const ctlA = new AbortController(); + const ctlB = new AbortController(); + const link = (ctl: AbortController) => + signal ? AbortSignal.any([signal, ctl.signal]) : ctl.signal; + + let aRes: ParsedShape | undefined; + let bRes: ParsedShape | undefined; + const pA = this.#fetchShape(urlA, link(ctlA)).then((r) => { + aRes = r; + return "a" as const; + }); + const pB = this.#fetchShape(urlB, link(ctlB)).then((r) => { + bRes = r; + return "b" as const; + }); + // A shape we don't end up awaiting (the race loser we abort, or the sibling + // left pending when the catch below rethrows) must not surface as an + // unhandled rejection. Attach detached no-op catches up front; the + // race/await paths still observe the original rejections through their own + // reactions, so this only swallows an otherwise-orphaned rejection. + void pA.catch(() => {}); + void pB.catch(() => {}); + + try { + if (!isLive) { + await Promise.all([pA, pB]); + return mergeParsedShapes(aRes!, bRes!, prior); + } + + const actionable = (r: ParsedShape) => + r.mustRefetch || r.status >= 400 || r.changes.length > 0; + + const first = await Promise.race([pA, pB]); + const firstRes = first === "a" ? aRes! : bRes!; + if (actionable(firstRes)) { + // Got changes/refetch from one shape; abort the other and return + // immediately. Its rejection is already swallowed by the catch attached + // above, so the abort can't surface as an unhandled rejection. + (first === "a" ? ctlB : ctlA).abort(); + return first === "a" + ? mergeParsedShapes(aRes!, unpolledShape("b", prior), prior) + : mergeParsedShapes(unpolledShape("a", prior), bRes!, prior); + } + + // First settled empty (idle timeout) — wait for the other. + await (first === "a" ? pB : pA); + return mergeParsedShapes(aRes!, bRes!, prior); + } catch (error) { + ctlA.abort(); + ctlB.abort(); + throw error; + } + } + + async #fetchShape(electricUrl: URL, signal?: AbortSignal): Promise { + const resp = await longPollingFetch(electricUrl.toString(), { signal }); + const headers = { + handle: + resp.headers.get("electric-handle") ?? resp.headers.get("electric-shape-id") ?? undefined, + offset: + resp.headers.get("electric-offset") ?? + resp.headers.get("electric-chunk-last-offset") ?? + undefined, + cursor: resp.headers.get("electric-cursor") ?? undefined, + schema: resp.headers.get("electric-schema") ?? undefined, + }; + if (resp.status >= 400) { + try { + await resp.body?.cancel(); + } catch {} + return parseShapeMessages(resp.status, headers, ""); + } + const bodyText = await resp.text(); + return parseShapeMessages(resp.status, headers, bodyText); + } + + #buildMergeResponse( + merged: MergedShape, + isLive: boolean, + apiVersion: API_VERSIONS, + clientVersion?: string + ): Response { + const responseHeaders = new Headers(); + responseHeaders.set("content-type", "application/json"); + responseHeaders.set("cache-control", "no-store"); + // Match the native client: expose electric-* headers cross-origin or the + // deployed react-hooks fail with MissingHeadersError. + responseHeaders.set("access-control-allow-origin", "*"); + responseHeaders.set("access-control-expose-headers", "*"); + + if (merged.mustRefetch) { + // Reset the client's shape state; it refetches both tables from scratch. + return new Response(JSON.stringify([MUST_REFETCH_MESSAGE, UP_TO_DATE_MESSAGE]), { + status: 409, + headers: responseHeaders, + }); + } + + if (clientVersion) { + responseHeaders.set("electric-handle", merged.handle); + responseHeaders.set("electric-offset", merged.offset); + } else { + responseHeaders.set("electric-shape-id", merged.handle); + responseHeaders.set("electric-chunk-last-offset", merged.offset); + } + if (isLive) { + // The client requires electric-cursor on every live response (its live + // cache-buster). Fall back to the offset if neither shape provided one. + responseHeaders.set("electric-cursor", merged.cursor ?? merged.offset); + } else if (merged.schema !== undefined) { + // Non-live responses require electric-schema. + responseHeaders.set("electric-schema", merged.schema); + } + + const body = JSON.stringify([...merged.changes, UP_TO_DATE_MESSAGE]); + const finalBody = + apiVersion === CURRENT_API_VERSION ? body : this.#rewriteResponseBodyForNoneApiVersion(body); + return new Response(finalBody, { status: 200, headers: responseHeaders }); + } + #constructRunsElectricUrl( url: URL | string, environment: RealtimeEnvironment, whereClause: string, + table: string, requestOptions?: RealtimeRequestOptions, clientVersion?: string ): URL { @@ -322,7 +602,7 @@ export class RealtimeClient { }); electricUrl.searchParams.set("where", whereClause); - electricUrl.searchParams.set("table", 'public."TaskRun"'); + electricUrl.searchParams.set("table", table); if (!clientVersion) { // If the client version is not provided, that means we're using an older client diff --git a/apps/webapp/app/v3/runTableV2.server.ts b/apps/webapp/app/v3/runTableV2.server.ts index 5fa089fbd6e..51b55aefbe3 100644 --- a/apps/webapp/app/v3/runTableV2.server.ts +++ b/apps/webapp/app/v3/runTableV2.server.ts @@ -1,15 +1,5 @@ import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags"; -export type ShouldUseV2RunTableOptions = { - /** - * Whether the native realtime backend is enabled for this deployment - * (`env.REALTIME_BACKEND_NATIVE_ENABLED === "1"`). Passed in rather than read - * from env here so this stays a pure, env-free function the caller can - * unit-test directly. - */ - nativeRealtimeEnabled: boolean; -}; - /** * Per-org cutover switch for the parallel `task_run_v2` run table. * @@ -19,45 +9,20 @@ export type ShouldUseV2RunTableOptions = { * routes the run to `task_run_v2`; off (the default) → mint a legacy id, which * routes to `TaskRun`. * - * GATED ON NATIVE REALTIME. The Electric realtime backend serves shapes bound - * to a single table (`TaskRun`) and is being retired; only the native backend - * is table-agnostic and can observe a `task_run_v2` run in realtime - * (subscribeToRun / useRealtimeRun / poll). Routing a run to v2 while the org is - * still served by Electric would make that run silently invisible in realtime, - * so v2 requires BOTH the deployment master switch (`nativeRealtimeEnabled`) and - * the org's `realtimeBackend` flag set to "native". This is a temporary - * coupling: once Electric is removed and native is the only/default backend, - * drop the native check. - * * RunStore never reads this flag: it routes purely by id format. The flag only * decides which id scheme is minted upstream. Disabling it sends only NEW runs * back to legacy; runs already created on v2 stay readable there (routed by id). */ -export function shouldUseV2RunTable( - orgFeatureFlags: unknown, - options: ShouldUseV2RunTableOptions -): boolean { +export function shouldUseV2RunTable(orgFeatureFlags: unknown): boolean { if (orgFeatureFlags === null || typeof orgFeatureFlags !== "object") { return false; } - const flags = orgFeatureFlags as Record; - - // Native realtime is a hard prerequisite (see doc comment): a v2 run is only - // observable in realtime on the native backend. - if (!options.nativeRealtimeEnabled) { - return false; - } - const backend = FeatureFlagCatalog[FEATURE_FLAG.realtimeBackend].safeParse( - flags[FEATURE_FLAG.realtimeBackend] - ); - if (!(backend.success && backend.data === "native")) { - return false; - } - const override = flags[FEATURE_FLAG.runTableV2]; + const override = (orgFeatureFlags as Record)[FEATURE_FLAG.runTableV2]; if (override === undefined) { return false; } + const parsed = FeatureFlagCatalog[FEATURE_FLAG.runTableV2].safeParse(override); return parsed.success ? parsed.data : false; } diff --git a/apps/webapp/test/electricShapeMerge.test.ts b/apps/webapp/test/electricShapeMerge.test.ts new file mode 100644 index 00000000000..7f0bf9e0b5b --- /dev/null +++ b/apps/webapp/test/electricShapeMerge.test.ts @@ -0,0 +1,201 @@ +import { describe, expect, it } from "vitest"; +import { + decodeCompositeOffset, + decodeCompositePart, + encodeComposite, + mergeParsedShapes, + parseShapeMessages, + unpolledShape, + type ParsedShape, + type PriorContinuation, +} from "~/services/realtime/electricShapeMerge.server"; + +const INSERT = { + key: '"public"."TaskRun"/"r1"', + value: { id: "r1" }, + headers: { operation: "insert" }, +}; +const UPDATE = { + key: '"public"."task_run_v2"/"r2"', + value: { id: "r2" }, + headers: { operation: "update" }, +}; + +function shape(overrides: Partial = {}): ParsedShape { + return { + status: 200, + handle: "h", + offset: "o", + cursor: "c", + schema: '{"id":{"type":"text"}}', + changes: [], + upToDate: true, + mustRefetch: false, + ...overrides, + }; +} + +const PRIOR: PriorContinuation = { + handleA: "HA", + offsetA: "OA", + cursorA: "CA", + handleB: "HB", + offsetB: "OB", + cursorB: "CB", +}; + +describe("decodeCompositePart", () => { + it("returns both undefined for null / no separator", () => { + expect(decodeCompositePart(null)).toEqual({ a: undefined, b: undefined }); + expect(decodeCompositePart(undefined)).toEqual({ a: undefined, b: undefined }); + expect(decodeCompositePart("")).toEqual({ a: undefined, b: undefined }); + // A bare value with no separator means "not a composite yet" -> initial. + expect(decodeCompositePart("solo")).toEqual({ a: undefined, b: undefined }); + }); + + it("splits a composite into its two parts", () => { + expect(decodeCompositePart("hA~hB")).toEqual({ a: "hA", b: "hB" }); + }); + + it("treats an empty side as undefined", () => { + expect(decodeCompositePart("hA~")).toEqual({ a: "hA", b: undefined }); + expect(decodeCompositePart("~hB")).toEqual({ a: undefined, b: "hB" }); + }); +}); + +describe("decodeCompositeOffset", () => { + it("applies a bare offset (e.g. the initial -1) to both shapes", () => { + expect(decodeCompositeOffset("-1")).toEqual({ a: "-1", b: "-1" }); + }); + + it("splits a composite offset", () => { + expect(decodeCompositeOffset("26800552_0~26800999_2")).toEqual({ + a: "26800552_0", + b: "26800999_2", + }); + }); + + it("round-trips through encodeComposite", () => { + expect(decodeCompositeOffset(encodeComposite("x_1", "y_2"))).toEqual({ a: "x_1", b: "y_2" }); + }); +}); + +describe("parseShapeMessages", () => { + const headers = { handle: "h", offset: "o", cursor: "c", schema: "s" }; + + it("extracts change rows and the up-to-date flag", () => { + const body = JSON.stringify([INSERT, { headers: { control: "up-to-date" } }]); + const parsed = parseShapeMessages(200, headers, body); + expect(parsed.changes).toEqual([INSERT]); + expect(parsed.upToDate).toBe(true); + expect(parsed.mustRefetch).toBe(false); + }); + + it("treats a bare up-to-date as no changes", () => { + const parsed = parseShapeMessages( + 200, + headers, + JSON.stringify([{ headers: { control: "up-to-date" } }]) + ); + expect(parsed.changes).toEqual([]); + expect(parsed.upToDate).toBe(true); + }); + + it("flags must-refetch from a 409 status", () => { + const parsed = parseShapeMessages(409, headers, ""); + expect(parsed.mustRefetch).toBe(true); + expect(parsed.changes).toEqual([]); + }); + + it("flags must-refetch from a control message", () => { + const body = JSON.stringify([ + { headers: { control: "must-refetch" } }, + { headers: { control: "up-to-date" } }, + ]); + expect(parseShapeMessages(200, headers, body).mustRefetch).toBe(true); + }); + + it("flags must-refetch for an unparseable / non-array body", () => { + expect(parseShapeMessages(200, headers, "not json").mustRefetch).toBe(true); + expect(parseShapeMessages(200, headers, "{}").mustRefetch).toBe(true); + }); + + it("treats an empty body as no changes (not up-to-date)", () => { + const parsed = parseShapeMessages(200, headers, ""); + expect(parsed.changes).toEqual([]); + expect(parsed.upToDate).toBe(false); + expect(parsed.mustRefetch).toBe(false); + }); +}); + +describe("mergeParsedShapes", () => { + it("concatenates change rows from both tables", () => { + const merged = mergeParsedShapes( + shape({ changes: [INSERT], handle: "hA", offset: "oA", cursor: "cA" }), + shape({ changes: [UPDATE], handle: "hB", offset: "oB", cursor: "cB" }), + PRIOR + ); + expect(merged.mustRefetch).toBe(false); + if (merged.mustRefetch) return; + expect(merged.changes).toEqual([INSERT, UPDATE]); + expect(merged.handle).toBe(encodeComposite("hA", "hB")); + expect(merged.offset).toBe(encodeComposite("oA", "oB")); + expect(merged.cursor).toBe(encodeComposite("cA", "cB")); + }); + + it("resets when either shape needs a refetch", () => { + expect(mergeParsedShapes(shape({ mustRefetch: true }), shape(), PRIOR)).toEqual({ + mustRefetch: true, + }); + expect(mergeParsedShapes(shape(), shape({ status: 409 }), PRIOR)).toEqual({ + mustRefetch: true, + }); + }); + + it("falls back to the prior continuation for a shape that returned nothing", () => { + // B was left un-polled (the other table returned changes first). + const merged = mergeParsedShapes( + shape({ changes: [INSERT], handle: "hA2", offset: "oA2", cursor: "cA2" }), + unpolledShape("b", PRIOR), + PRIOR + ); + expect(merged.mustRefetch).toBe(false); + if (merged.mustRefetch) return; + expect(merged.changes).toEqual([INSERT]); + expect(merged.handle).toBe(encodeComposite("hA2", "HB")); + expect(merged.offset).toBe(encodeComposite("oA2", "OB")); + expect(merged.cursor).toBe(encodeComposite("cA2", "CB")); + }); + + it("uses the prior cursor when a returned shape omits it", () => { + const merged = mergeParsedShapes( + shape({ cursor: undefined, handle: "hA", offset: "oA" }), + shape({ cursor: "cB", handle: "hB", offset: "oB" }), + PRIOR + ); + if (merged.mustRefetch) throw new Error("unexpected refetch"); + // a omitted cursor -> prior.cursorA ("CA"); b returned "cB". + expect(merged.cursor).toBe(encodeComposite("CA", "cB")); + }); + + it("omits the cursor entirely when neither shape nor prior has one (initial snapshot)", () => { + const initialPrior: PriorContinuation = { offsetA: "-1", offsetB: "-1" }; + const merged = mergeParsedShapes( + shape({ cursor: undefined, handle: "hA", offset: "oA" }), + shape({ cursor: undefined, handle: "hB", offset: "oB" }), + initialPrior + ); + if (merged.mustRefetch) throw new Error("unexpected refetch"); + expect(merged.cursor).toBeUndefined(); + }); + + it("carries schema from whichever shape supplied it", () => { + const merged = mergeParsedShapes( + shape({ schema: undefined }), + shape({ schema: '{"id":{"type":"text"}}' }), + PRIOR + ); + if (merged.mustRefetch) throw new Error("unexpected refetch"); + expect(merged.schema).toBe('{"id":{"type":"text"}}'); + }); +}); diff --git a/apps/webapp/test/realtimeClient.test.ts b/apps/webapp/test/realtimeClient.test.ts index d98213e5b17..cdff50e3d18 100644 --- a/apps/webapp/test/realtimeClient.test.ts +++ b/apps/webapp/test/realtimeClient.test.ts @@ -237,8 +237,13 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { const chunkOffset = headers["electric-offset"]; expect(response.status).toBe(200); + // The tag/list feed spans both physical run tables, so streamRuns merges + // two upstream Electric shapes (TaskRun + task_run_v2) under one composite + // cursor: handle and offset each pack the two per-table values joined by + // "~". Both shapes are at "0_0" for the initial snapshot. expect(shapeId).toBeDefined(); - expect(chunkOffset).toBe("0_0"); + expect(shapeId).toContain("~"); + expect(chunkOffset).toBe("0_0~0_0"); } ); diff --git a/apps/webapp/test/runTableV2.test.ts b/apps/webapp/test/runTableV2.test.ts index 7aa528b34a9..9abae4cb7bb 100644 --- a/apps/webapp/test/runTableV2.test.ts +++ b/apps/webapp/test/runTableV2.test.ts @@ -1,50 +1,28 @@ import { describe, expect, it } from "vitest"; import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; -// v2 is gated on the org being served realtime by the NATIVE backend (Electric -// can't observe task_run_v2). That requires the deployment master switch -// (nativeRealtimeEnabled) AND the per-org `realtimeBackend` flag set to "native". -const NATIVE_ON = { nativeRealtimeEnabled: true }; -const NATIVE_OFF = { nativeRealtimeEnabled: false }; -const onNative = (extra: Record = {}) => ({ realtimeBackend: "native", ...extra }); - describe("shouldUseV2RunTable", () => { it("defaults to false when the org has no flags", () => { - expect(shouldUseV2RunTable(null, NATIVE_ON)).toBe(false); - expect(shouldUseV2RunTable(undefined, NATIVE_ON)).toBe(false); - expect(shouldUseV2RunTable({}, NATIVE_ON)).toBe(false); - }); - - it("returns true only when runTableV2 is boolean true AND the org is on native realtime", () => { - expect(shouldUseV2RunTable(onNative({ runTableV2: true }), NATIVE_ON)).toBe(true); - expect(shouldUseV2RunTable(onNative({ runTableV2: false }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(null)).toBe(false); + expect(shouldUseV2RunTable(undefined)).toBe(false); + expect(shouldUseV2RunTable({})).toBe(false); }); - it("requires the native realtime backend (Electric can't observe v2 runs)", () => { - // runTableV2 on, but the org is not on native realtime → no v2 (it would be - // realtime-invisible). - expect(shouldUseV2RunTable({ runTableV2: true }, NATIVE_ON)).toBe(false); - expect(shouldUseV2RunTable({ runTableV2: true, realtimeBackend: "electric" }, NATIVE_ON)).toBe( - false - ); - expect(shouldUseV2RunTable({ runTableV2: true, realtimeBackend: "shadow" }, NATIVE_ON)).toBe( - false - ); - // On native per-org, but the deployment master switch is off → effectively - // still Electric → no v2. - expect(shouldUseV2RunTable(onNative({ runTableV2: true }), NATIVE_OFF)).toBe(false); + it("returns true only when the flag is the boolean true", () => { + expect(shouldUseV2RunTable({ runTableV2: true })).toBe(true); + expect(shouldUseV2RunTable({ runTableV2: false })).toBe(false); }); it("rejects a stringified flag value (strict boolean, no coercion)", () => { // A stringified "false" must not coerce to true and cut the org over. - expect(shouldUseV2RunTable(onNative({ runTableV2: "true" }), NATIVE_ON)).toBe(false); - expect(shouldUseV2RunTable(onNative({ runTableV2: "false" }), NATIVE_ON)).toBe(false); - expect(shouldUseV2RunTable(onNative({ runTableV2: 1 }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable({ runTableV2: "true" })).toBe(false); + expect(shouldUseV2RunTable({ runTableV2: "false" })).toBe(false); + expect(shouldUseV2RunTable({ runTableV2: 1 })).toBe(false); }); it("ignores unrelated flags and non-object inputs", () => { - expect(shouldUseV2RunTable(onNative({ mollifierEnabled: true }), NATIVE_ON)).toBe(false); - expect(shouldUseV2RunTable("runTableV2", NATIVE_ON)).toBe(false); - expect(shouldUseV2RunTable(42, NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable({ mollifierEnabled: true })).toBe(false); + expect(shouldUseV2RunTable("runTableV2")).toBe(false); + expect(shouldUseV2RunTable(42)).toBe(false); }); }); From ef54cb979f571862c4e0beea220f1172162d37aa Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 17:42:30 +0100 Subject: [PATCH 63/83] fix(webapp,run-engine): close cross-table gaps in the task_run_v2 mixed window Routes that walk the run hierarchy through a Prisma relation only see one physical table, so during a runTableV2 flag flip (a parent and child on opposite tables) they silently miss the cross-table run. This closes the reachable cases: - cancelRun resolves child runs across both tables, so cancelling a parent cascades to a child in the other table instead of leaving it executing and holding concurrency. - updateMetadata routes metadata.parent/root operations to the scalar parent/root id, so they reach a parent in the other table instead of falling back to the child run. - a one-time-use token with no idempotency key now takes a cross-table claim for v2 orgs, so two presentations straddling a flip cannot each mint a run in a different table. - the Electric shape merge reports up-to-date only when both tables are caught up, so a multi-chunk initial snapshot no longer drops the rows that arrive after the first chunk. --- .../concerns/idempotencyKeys.server.ts | 90 +++++++++-- .../metadata/updateMetadata.server.ts | 24 ++- .../realtime/electricShapeMerge.server.ts | 16 ++ .../app/services/realtimeClient.server.ts | 8 +- apps/webapp/test/electricShapeMerge.test.ts | 49 ++++++ apps/webapp/test/oneTimeUseTokenClaim.test.ts | 153 ++++++++++++++++++ apps/webapp/test/updateMetadata.test.ts | 109 +++++++++++++ .../src/engine/systems/runAttemptSystem.ts | 24 ++- 8 files changed, 441 insertions(+), 32 deletions(-) create mode 100644 apps/webapp/test/oneTimeUseTokenClaim.test.ts diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index e8dd61c5c87..383ce4399cc 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -215,6 +215,73 @@ export class IdempotencyKeyConcern { new Date(Date.now() + 24 * 60 * 60 * 1000 * 30); // 30 days if (!idempotencyKey) { + // A one-time-use token with NO idempotency key would otherwise skip the + // claim path below entirely. During a `runTableV2` flag flip, two + // concurrent presentations of the same token can mint into DIFFERENT + // physical tables (cuid -> TaskRun, ksuid -> task_run_v2); the per-table + // unique constraint on `oneTimeUseToken` can't see across the two tables, + // so neither INSERT raises P2002 and one token spawns two runs. For + // v2-cutover orgs, serialise on the token via a Redis claim so the first + // presentation wins and the rest resolve to it. Excludes + // resumeParentOnCompletion (triggerAndWait) to match the buffer + // fallback's handling — a one-time PUBLIC_JWT token is a fire-and-forget + // public trigger, not a parent/child wait, so that case is left to the + // per-table constraint. + const oneTimeUseToken = request.options?.oneTimeUseToken; + if (oneTimeUseToken && !request.body.options?.resumeParentOnCompletion) { + const orgFeatureFlags = + (request.environment.organization?.featureFlags as + | Record + | null + | undefined) ?? null; + if (shouldUseV2RunTable(orgFeatureFlags)) { + // Namespace the claim key so a token can never collide with a real + // idempotency key in the same (envId, taskIdentifier) slot. The TTL is + // a fixed pipeline-dwell bound, NOT the customer idempotencyKeyTTL: + // there is no idempotency key in this path, so a client-supplied TTL + // has no meaning here, and a tiny value would expire the claim + // mid-flight and reopen the cross-table dup window. + const claimKey = `otu:${oneTimeUseToken}`; + const outcome = await claimOrAwait({ + envId: request.environment.id, + taskIdentifier: request.taskId, + idempotencyKey: claimKey, + ttlSeconds: env.TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS, + safetyNetMs: env.TRIGGER_MOLLIFIER_CLAIM_WAIT_MS, + pollStepMs: env.TRIGGER_MOLLIFIER_CLAIM_POLL_MS, + }); + if (outcome.kind === "resolved") { + // A concurrent presentation of the same one-time token already won + // and committed a run. Reject this one exactly as the within-table + // path does (the per-table oneTimeUseToken unique constraint raises + // P2002 -> RunOneTimeUseTokenError -> this same 4xx), preserving the + // "token already used" contract while closing the cross-table gap. + throw new ServiceValidationError( + `Cannot trigger ${request.taskId} with a one-time use token as it has already been used.` + ); + } else if (outcome.kind === "timed_out") { + throw new ServiceValidationError( + "One-time-use token claim resolution timed out", + 503 + ); + } else if (outcome.kind === "claimed") { + // We own the claim. The trigger pipeline MUST publish (on success) + // or release (on error) it — wired through the returned `claim`, + // exactly like the idempotency-keyed path. + return { + isCached: false, + idempotencyKey, + idempotencyKeyExpiresAt, + claim: { + envId: request.environment.id, + taskIdentifier: request.taskId, + idempotencyKey: claimKey, + token: outcome.token, + }, + }; + } + } + } return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } @@ -329,17 +396,22 @@ export class IdempotencyKeyConcern { | Record | null | undefined) ?? null; - // v2-cutover orgs: ANY idempotency-keyed trigger can straddle a - // `runTableV2` flag flip into different physical tables (cuid -> TaskRun, - // ksuid -> task_run_v2), so the claim must serialise all of them — - // including triggerAndWait (resumeParentOnCompletion), debounce, and - // oneTimeUseToken, whose per-table unique constraints (idempotencyKey, - // oneTimeUseToken) can't see across the two tables. The + // v2-cutover orgs: an idempotency-keyed trigger can straddle a `runTableV2` + // flag flip into different physical tables (cuid -> TaskRun, ksuid -> + // task_run_v2), and the per-table idempotency-key unique constraints can't + // see across the two tables, so this claim (keyed on the idempotency key) + // is the only backstop that serialises same-key triggers across the flip, + // including triggerAndWait (resumeParentOnCompletion) and debounce. The // resumeParentOnCompletion/debounce/oneTimeUseToken exclusions below are // mollifier-gate alignment optimisations (those requests always return - // pass_through from the gate, so there's no buffer to serialise against) - // and don't apply to the cross-table concern. shouldUseV2RunTable is - // checked first so a v2 org skips the mollifier-flag resolve entirely. + // pass_through from the gate, so there's no buffer to serialise against); + // they don't apply to v2 orgs, which short-circuit to claimEligible via + // shouldUseV2RunTable regardless. oneTimeUseToken triggers with NO + // idempotency key are serialised separately by the token claim in the + // early-return block above; the residual same-token-with-two-different-keys + // case is not covered here (each key claims its own slot) and would require + // a pathological client. shouldUseV2RunTable is checked first so a v2 org + // skips the mollifier-flag resolve entirely. const claimEligible = shouldUseV2RunTable(orgFeatureFlags) || (!request.body.options?.resumeParentOnCompletion && diff --git a/apps/webapp/app/services/metadata/updateMetadata.server.ts b/apps/webapp/app/services/metadata/updateMetadata.server.ts index 2af44d747bd..d1beba9c42d 100644 --- a/apps/webapp/app/services/metadata/updateMetadata.server.ts +++ b/apps/webapp/app/services/metadata/updateMetadata.server.ts @@ -354,18 +354,14 @@ export class UpdateMetadataService { metadata: true, metadataType: true, metadataVersion: true, - parentTaskRun: { - select: { - id: true, - status: true, - }, - }, - rootTaskRun: { - select: { - id: true, - status: true, - }, - }, + // Scalar parent/root pointers, NOT the parentTaskRun/rootTaskRun + // relations: a relation select is bound to one physical run table and + // resolves to null when the parent/root lives in the other table (a + // v2 child of a legacy parent in the mixed window). The scalar id is + // table-agnostic, and #ingestRunOperations only needs the id — the + // flusher routes by id format across both tables. + parentTaskRunId: true, + rootTaskRunId: true, }, }, this._prisma @@ -380,11 +376,11 @@ export class UpdateMetadataService { } if (body.parentOperations && body.parentOperations.length > 0) { - this.#ingestRunOperations(taskRun.parentTaskRun?.id ?? taskRun.id, body.parentOperations); + this.#ingestRunOperations(taskRun.parentTaskRunId ?? taskRun.id, body.parentOperations); } if (body.rootOperations && body.rootOperations.length > 0) { - this.#ingestRunOperations(taskRun.rootTaskRun?.id ?? taskRun.id, body.rootOperations); + this.#ingestRunOperations(taskRun.rootTaskRunId ?? taskRun.id, body.rootOperations); } const result = await this.#updateRunMetadata({ diff --git a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts index cfe494e4a47..751bcd22dc6 100644 --- a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts +++ b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts @@ -51,6 +51,16 @@ export type MergedShape = offset: string; cursor?: string; schema?: string; + /** + * The composite is up-to-date only when BOTH shapes are. An Electric + * snapshot can span multiple chunks: every chunk but the last omits the + * `up-to-date` control message. If one table's snapshot is still mid-fetch + * (chunk 1 of N) while the other has completed, the merged response must + * NOT terminate with `up-to-date` — otherwise the client believes the + * whole snapshot is done, flips to live, and never fetches the remaining + * chunks (silently dropping that table's overflow rows). + */ + upToDate: boolean; }; /** @@ -146,6 +156,12 @@ export function mergeParsedShapes( offset: encodeComposite(a.offset ?? prior.offsetA, b.offset ?? prior.offsetB), cursor, schema: a.schema ?? b.schema, + // Only terminate the composite when BOTH shapes have caught up; an + // un-up-to-date shape (a snapshot chunk that isn't the last) keeps the + // client requesting the remainder. unpolledShape() reports upToDate:true, + // so a live round that returns changes from one shape and carries the + // other forward still terminates iff the polled shape is itself up-to-date. + upToDate: a.upToDate && b.upToDate, }; } diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index d2e68c64e4e..cd95dbaaec4 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -573,7 +573,13 @@ export class RealtimeClient { responseHeaders.set("electric-schema", merged.schema); } - const body = JSON.stringify([...merged.changes, UP_TO_DATE_MESSAGE]); + // Only append the up-to-date terminator when BOTH upstream shapes are + // caught up. If one table's snapshot is still spanning chunks, omitting the + // terminator keeps the client in snapshot mode fetching the rest instead of + // prematurely flipping to live and dropping that table's remaining rows. + const body = JSON.stringify( + merged.upToDate ? [...merged.changes, UP_TO_DATE_MESSAGE] : [...merged.changes] + ); const finalBody = apiVersion === CURRENT_API_VERSION ? body : this.#rewriteResponseBodyForNoneApiVersion(body); return new Response(finalBody, { status: 200, headers: responseHeaders }); diff --git a/apps/webapp/test/electricShapeMerge.test.ts b/apps/webapp/test/electricShapeMerge.test.ts index 7f0bf9e0b5b..fb67d2ba6b4 100644 --- a/apps/webapp/test/electricShapeMerge.test.ts +++ b/apps/webapp/test/electricShapeMerge.test.ts @@ -198,4 +198,53 @@ describe("mergeParsedShapes", () => { if (merged.mustRefetch) throw new Error("unexpected refetch"); expect(merged.schema).toBe('{"id":{"type":"text"}}'); }); + + it("is up-to-date only when BOTH shapes are caught up (multi-chunk snapshot guard)", () => { + // Both caught up -> the composite terminates with up-to-date. + const both = mergeParsedShapes(shape({ upToDate: true }), shape({ upToDate: true }), PRIOR); + if (both.mustRefetch) throw new Error("unexpected refetch"); + expect(both.upToDate).toBe(true); + + // Table A is mid-snapshot (chunk 1 of N: rows but no up-to-date control + // message); B has completed. The composite must NOT be up-to-date — else + // the client flips to live after chunk 1 and silently drops A's remaining + // rows. The rows seen so far still flow through. + const aMidSnapshot = mergeParsedShapes( + shape({ changes: [INSERT], upToDate: false, handle: "hA", offset: "oA" }), + shape({ upToDate: true, handle: "hB", offset: "oB" }), + PRIOR + ); + if (aMidSnapshot.mustRefetch) throw new Error("unexpected refetch"); + expect(aMidSnapshot.upToDate).toBe(false); + expect(aMidSnapshot.changes).toEqual([INSERT]); + + // Symmetric: B mid-snapshot. + const bMidSnapshot = mergeParsedShapes( + shape({ upToDate: true }), + shape({ changes: [UPDATE], upToDate: false }), + PRIOR + ); + if (bMidSnapshot.mustRefetch) throw new Error("unexpected refetch"); + expect(bMidSnapshot.upToDate).toBe(false); + }); + + it("a live round carrying the un-polled sibling terminates only when the polled shape is caught up", () => { + // unpolledShape reports upToDate:true, so the composite terminates iff the + // polled shape is itself caught up. + const caughtUp = mergeParsedShapes( + shape({ changes: [INSERT], upToDate: true }), + unpolledShape("b", PRIOR), + PRIOR + ); + if (caughtUp.mustRefetch) throw new Error("unexpected refetch"); + expect(caughtUp.upToDate).toBe(true); + + const moreComing = mergeParsedShapes( + shape({ changes: [INSERT], upToDate: false }), + unpolledShape("b", PRIOR), + PRIOR + ); + if (moreComing.mustRefetch) throw new Error("unexpected refetch"); + expect(moreComing.upToDate).toBe(false); + }); }); diff --git a/apps/webapp/test/oneTimeUseTokenClaim.test.ts b/apps/webapp/test/oneTimeUseTokenClaim.test.ts new file mode 100644 index 00000000000..b03d2d6de23 --- /dev/null +++ b/apps/webapp/test/oneTimeUseTokenClaim.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, it, vi } from "vitest"; + +// Stub `~/db.server` before importing the concern — the real module eagerly +// calls `prisma.$connect()` at singleton construction. The concern under test +// receives its prisma via the constructor, and the one-time-token path below +// reaches the claim before any DB read, so the stub is never exercised. +vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); + +// claimOrAwait resolves its backend through getIdempotencyClaimBuffer; script +// it via a hoisted handle so each test controls the claim outcome. +const h = vi.hoisted(() => ({ buffer: null as unknown })); +vi.mock("~/v3/mollifier/mollifierBuffer.server", () => ({ + getMollifierBuffer: () => h.buffer, + getIdempotencyClaimBuffer: () => h.buffer, +})); +// The one-time-token claim runs BEFORE the mollifier-flag resolve, but the +// concern still imports the gate module; stub it so loading doesn't pull in +// extra feature-flag wiring. +vi.mock("~/v3/mollifier/mollifierGate.server", () => ({ + makeResolveMollifierFlag: () => async () => false, +})); + +import type { MollifierBuffer } from "@trigger.dev/redis-worker"; +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import type { TriggerTaskRequest } from "~/runEngine/types"; + +function makeConcern() { + return new IdempotencyKeyConcern( + { + taskRun: { findFirst: async () => null }, + taskRunV2: { findFirst: async () => null }, + } as never, + {} as never, // engine — unused on this path + {} as never // traceEventConcern — unused on this path + ); +} + +function makeOtuRequest( + overrides: { + featureFlags?: Record; + oneTimeUseToken?: string | undefined; + resumeParentOnCompletion?: boolean; + } = {} +): TriggerTaskRequest { + return { + taskId: "my-task", + environment: { + id: "env_a", + organizationId: "org_1", + organization: { featureFlags: overrides.featureFlags ?? { runTableV2: true } }, + }, + // No idempotencyKey on purpose — this is the path the per-table + // oneTimeUseToken unique constraint cannot cover across two tables. + options: { oneTimeUseToken: "oneTimeUseToken" in overrides ? overrides.oneTimeUseToken : "tok-1" }, + body: { + options: overrides.resumeParentOnCompletion ? { resumeParentOnCompletion: true } : {}, + }, + } as unknown as TriggerTaskRequest; +} + +describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () => { + it("v2 org: a one-time token with no idempotency key takes a claim keyed on the token", async () => { + const claimIdempotency = vi.fn(async () => ({ kind: "claimed" as const })); + h.buffer = { + claimIdempotency, + readClaim: vi.fn(async () => null), + } as unknown as MollifierBuffer; + + const result = await makeConcern().handleTriggerRequest(makeOtuRequest(), undefined); + + expect(result.isCached).toBe(false); + if (result.isCached === false) { + // The trigger pipeline must publish/release this claim — keyed on the + // namespaced token so it can never collide with a real idempotency key. + expect(result.claim?.idempotencyKey).toBe("otu:tok-1"); + expect(result.claim?.envId).toBe("env_a"); + expect(result.claim?.taskIdentifier).toBe("my-task"); + } + expect(claimIdempotency).toHaveBeenCalledTimes(1); + expect(claimIdempotency.mock.calls[0][0]).toMatchObject({ idempotencyKey: "otu:tok-1" }); + }); + + it("v2 org: a concurrent winner (claim resolved) rejects the second presentation as already-used", async () => { + // The winner committed a run under the token; the loser must be rejected + // exactly like the within-table P2002 path, NOT allowed to mint a duplicate + // into the other table. + h.buffer = { + claimIdempotency: vi.fn(async () => ({ kind: "resolved", runId: "run_winner" })), + readClaim: vi.fn(async () => null), + } as unknown as MollifierBuffer; + + await expect( + makeConcern().handleTriggerRequest(makeOtuRequest(), undefined) + ).rejects.toThrow(/already been used/i); + }); + + it("non-v2 org: skips the token claim entirely (no Redis round-trip)", async () => { + const claimIdempotency = vi.fn(async () => ({ kind: "claimed" as const })); + h.buffer = { + claimIdempotency, + readClaim: vi.fn(async () => null), + } as unknown as MollifierBuffer; + + const result = await makeConcern().handleTriggerRequest( + makeOtuRequest({ featureFlags: { mollifierEnabled: true } }), + undefined + ); + + expect(result.isCached).toBe(false); + if (result.isCached === false) { + expect(result.claim).toBeUndefined(); + } + expect(claimIdempotency).not.toHaveBeenCalled(); + }); + + it("triggerAndWait one-time token: left to the per-table constraint (not claimed here)", async () => { + const claimIdempotency = vi.fn(async () => ({ kind: "claimed" as const })); + h.buffer = { + claimIdempotency, + readClaim: vi.fn(async () => null), + } as unknown as MollifierBuffer; + + const result = await makeConcern().handleTriggerRequest( + makeOtuRequest({ resumeParentOnCompletion: true }), + undefined + ); + + expect(result.isCached).toBe(false); + if (result.isCached === false) { + expect(result.claim).toBeUndefined(); + } + expect(claimIdempotency).not.toHaveBeenCalled(); + }); + + it("no one-time token: ordinary no-idempotency-key trigger is unaffected", async () => { + const claimIdempotency = vi.fn(async () => ({ kind: "claimed" as const })); + h.buffer = { + claimIdempotency, + readClaim: vi.fn(async () => null), + } as unknown as MollifierBuffer; + + const result = await makeConcern().handleTriggerRequest( + makeOtuRequest({ oneTimeUseToken: undefined }), + undefined + ); + + expect(result.isCached).toBe(false); + if (result.isCached === false) { + expect(result.claim).toBeUndefined(); + } + expect(claimIdempotency).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/webapp/test/updateMetadata.test.ts b/apps/webapp/test/updateMetadata.test.ts index b78a1a50a9f..11b6ba41c79 100644 --- a/apps/webapp/test/updateMetadata.test.ts +++ b/apps/webapp/test/updateMetadata.test.ts @@ -1,5 +1,6 @@ import { containerTest } from "@internal/testcontainers"; import { parsePacket } from "@trigger.dev/core/v3"; +import { isKsuidId, RunId } from "@trigger.dev/core/v3/isomorphic"; import { setTimeout } from "timers/promises"; import { describe } from "vitest"; import { PostgresRunStore } from "@internal/run-store"; @@ -1291,4 +1292,112 @@ describe("UpdateMetadataService.call", () => { service.stopFlushing(); } ); + + containerTest( + "routes parent metadata operations to a parent in the OTHER run table (cross-table hierarchy)", + async ({ prisma }) => { + const service = new UpdateMetadataService({ + prisma, + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), + flushIntervalMs: 100, + flushEnabled: true, + flushLoggingEnabled: true, + maximumSize: 1024 * 1024 * 1, + logLevel: "debug", + }); + + const organization = await prisma.organization.create({ + data: { title: "test", slug: "test" }, + }); + const project = await prisma.project.create({ + data: { name: "test", slug: "test", organizationId: organization.id, externalRef: "test" }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // Legacy parent (cuid id) lives in TaskRun. This is the mixed-window + // hierarchy: an org flips runTableV2 on while a pre-flip parent is live, + // and its post-flip child mints a ksuid into task_run_v2. + const parentId = RunId.generate(); + expect(isKsuidId(parentId.id)).toBe(false); + const parentTaskRun = await prisma.taskRun.create({ + data: { + id: parentId.id, + friendlyId: parentId.friendlyId, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceId: "t", + spanId: "s", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + // v2 child (ksuid id) lives in task_run_v2 and points at the legacy + // parent by the scalar parentTaskRunId (no cross-table FK). + const childId = RunId.generateKsuid(); + expect(isKsuidId(childId.id)).toBe(true); + await prisma.taskRunV2.create({ + data: { + id: childId.id, + friendlyId: childId.friendlyId, + taskIdentifier: "my-child-task", + payload: "{}", + payloadType: "application/json", + traceId: "t", + spanId: "s", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + parentTaskRunId: parentTaskRun.id, + }, + }); + + // The child applies metadata.parent operations. Pre-fix, the table-bound + // parentTaskRun relation resolved null (parent is in the OTHER table), so + // the ops fell back to the child's own id — corrupting the child and + // never touching the parent. + await service.call(childId.id, { + parentOperations: [ + { type: "set", key: "foo", value: "bar" }, + { type: "append", key: "bar", value: "baz" }, + ], + }); + + // Wait for the buffered operations to flush. + await setTimeout(1000); + + // The PARENT (in TaskRun) must have received the operations. + const updatedParent = await prisma.taskRun.findFirst({ where: { id: parentTaskRun.id } }); + expect( + await parsePacket({ + data: updatedParent?.metadata ?? undefined, + dataType: updatedParent?.metadataType ?? "application/json", + }) + ).toEqual({ foo: "bar", bar: ["baz"] }); + + // The CHILD (in task_run_v2) must NOT have been polluted with parent ops. + const updatedChild = await prisma.taskRunV2.findFirst({ where: { id: childId.id } }); + expect(updatedChild?.metadata ?? null).toBeNull(); + + service.stopFlushing(); + } + ); }); diff --git a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts index 977c94a8e83..dd311f70b9d 100644 --- a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts @@ -1427,6 +1427,7 @@ export class RunAttemptSystem { completedAt: true, taskEventStore: true, parentTaskRunId: true, + runtimeEnvironmentId: true, delayUntil: true, updatedAt: true, runtimeEnvironment: { @@ -1439,11 +1440,6 @@ export class RunAttemptSystem { id: true, }, }, - childRuns: { - select: { - id: true, - }, - }, }, }, prisma @@ -1548,9 +1544,21 @@ export class RunAttemptSystem { //schedule the cancellation of all the child runs //it will call this function for each child, - //which will recursively cancel all children if they need to be - if (run.childRuns.length > 0) { - for (const childRun of run.childRuns) { + //which will recursively cancel all children if they need to be. + //Resolve children across BOTH run tables: a v2 parent can have a legacy + //cuid child (or vice versa) in the runTableV2 mixed window, and a + //childRuns relation select is bound to the parent's own table, so it + //would silently skip the cross-table children and leave them executing + //and holding concurrency after the parent is cancelled. + const childRuns = await this.$.runStore.findRuns( + { + where: { parentTaskRunId: runId, runtimeEnvironmentId: run.runtimeEnvironmentId }, + select: { id: true }, + }, + prisma + ); + if (childRuns.length > 0) { + for (const childRun of childRuns) { await this.$.worker.enqueue({ id: `cancelRun:${childRun.id}`, job: "cancelRun", From 3218843aada9328116c2ce3b38f9a3852035af4e Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 17:44:53 +0100 Subject: [PATCH 64/83] test(run-engine): cover cross-table cancel cascade in the task_run_v2 mixed window A cuid parent (TaskRun) with a ksuid child (task_run_v2): cancelling the parent must cascade to the child in the other table. Fails against the old table-bound childRuns relation, passes with the cross-table findRuns lookup. --- .../src/engine/tests/cancelling.test.ts | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/internal-packages/run-engine/src/engine/tests/cancelling.test.ts b/internal-packages/run-engine/src/engine/tests/cancelling.test.ts index aecae7a2632..eff083658bc 100644 --- a/internal-packages/run-engine/src/engine/tests/cancelling.test.ts +++ b/internal-packages/run-engine/src/engine/tests/cancelling.test.ts @@ -1,5 +1,6 @@ import { containerTest, assertNonNullable } from "@internal/testcontainers"; import { trace } from "@internal/tracing"; +import { isKsuidId, RunId } from "@trigger.dev/core/v3/isomorphic"; import { expect } from "vitest"; import { RunEngine } from "../index.js"; import { setTimeout } from "timers/promises"; @@ -227,6 +228,119 @@ describe("RunEngine cancelling", () => { } ); + containerTest( + "Cancelling a parent cascades to a child in the OTHER run table (cross-table mixed window)", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + masterQueueConsumersDisabled: true, + processWorkerQueueDebounceMs: 50, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const parentTask = "parent-task"; + const childTask = "child-task"; + await setupBackgroundWorker(engine, authenticatedEnvironment, [parentTask, childTask]); + + // Parent gets a cuid id (-> TaskRun); child gets a ksuid id + // (-> task_run_v2). This is exactly the hierarchy a runTableV2 flip + // creates while a pre-flip parent is still live. + const parentId = RunId.generate(); + const childId = RunId.generateKsuid(); + + const parentRun = await engine.trigger( + { + number: 1, + friendlyId: parentId.friendlyId, + environment: authenticatedEnvironment, + taskIdentifier: parentTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "tp", + spanId: "sp", + workerQueue: "main", + queue: `task/${parentTask}`, + isTest: false, + tags: [], + }, + prisma + ); + + const childRun = await engine.trigger( + { + number: 1, + friendlyId: childId.friendlyId, + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "tc", + spanId: "sc", + workerQueue: "main", + queue: `task/${childTask}`, + isTest: false, + tags: [], + parentTaskRunId: parentRun.id, + }, + prisma + ); + + // The hierarchy genuinely straddles the two physical run tables. + expect(isKsuidId(parentRun.id)).toBe(false); + expect(isKsuidId(childRun.id)).toBe(true); + + // Cancel the (queued) parent. Pre-fix, cancelRun read children through + // the table-bound childRuns relation, which cannot see the v2 child, so + // the cascade skipped it and it kept its place in the queue. Post-fix, + // the cross-table findRuns finds the child and cancels it too. + await engine.cancelRun({ + runId: parentRun.id, + completedAt: new Date(), + reason: "Cancelled by the user", + }); + + // The child cancellation is enqueued as a job; give the worker a moment. + await setTimeout(1000); + + const childData = await engine.getRunExecutionData({ runId: childRun.id }); + expect(childData?.run.status).toBe("CANCELED"); + } finally { + await engine.quit(); + } + } + ); + containerTest("Cancelling a run (not executing)", async ({ prisma, redisOptions }) => { //create environment const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); From e64e950e4b582b54c7a9db5dc076be81d54229d9 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 17:55:59 +0100 Subject: [PATCH 65/83] fix(run-store): reject findRuns take without orderBy across both run tables An unordered take capped each run table independently and concatenated the two results, so a both-table read could silently drop one table rows once the other filled the cap. Reject it like the existing skip and cursor guards; callers that need a bounded cross-table read pass an orderBy for the keyset merge. --- .../run-store/src/PostgresRunStore.test.ts | 28 +++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 28 ++++++++++++------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 821fb53cc7d..3458d698e20 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -2400,6 +2400,34 @@ describe("PostgresRunStore — table routing by id format", () => { } ); + postgresTest( + "findRuns rejects `take` without `orderBy` across both tables (non-deterministic cap)", + async ({ prisma }) => { + const { environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // A both-table predicate (no id-list) with `take` but no `orderBy` would + // cap each table independently and silently drop one table's overflow. + await expect( + store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true }, + take: 5, + }) + ).rejects.toThrow(/take.*orderBy/i); + + // The same read WITH an `orderBy` is a valid bounded cross-table merge. + await expect( + store.findRuns({ + where: { runtimeEnvironmentId: environment.id }, + select: { id: true, createdAt: true }, + orderBy: { createdAt: "desc" }, + take: 5, + }) + ).resolves.toBeDefined(); + } + ); + postgresTest( "findRuns with an id-list partitions by id format and skips the table with no candidate ids", async ({ prisma }) => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index 242daf19fb6..ff78e246f94 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -936,6 +936,20 @@ export class PostgresRunStore implements RunStore { "RunStore.findRuns: a negative `take` (Prisma 'last N') is not supported across both run tables." ); } + // `take` without `orderBy` across BOTH tables is non-deterministic: each + // table is capped at `take` independently, then the two capped sets are + // concatenated, so once one table fills `take` the other table's rows are + // silently dropped. Reject it (like `skip`/`cursor` above) rather than + // return a result that may omit one table. Add an `orderBy` for a bounded + // cross-table merge, or scope the predicate to a single table. + if (args.take !== undefined && ordered.length === 0) { + throw new Error( + "RunStore.findRuns: `take` without `orderBy` is not supported across both run tables " + + "(each table is capped independently, so the cap is non-deterministic and may omit one " + + "table's rows). Add an `orderBy` for a bounded cross-table merge, or scope the predicate " + + "to a single table." + ); + } // ORDERED + LIMITED → bounded 2-way merge. if (ordered.length > 0 && args.take !== undefined) { @@ -960,9 +974,10 @@ export class PostgresRunStore implements RunStore { return this.#stripAddedKeys(merged, addedKeys); } - // UNORDERED / NO-LIMIT (or `take` without `orderBy`) → run the SAME args - // against both tables and concatenate. A run is in exactly one table, so - // concatenation is complete and has no duplicates. + // UNORDERED / NO-LIMIT → run the SAME args against both tables and + // concatenate. A run is in exactly one table, so concatenation is complete + // and has no duplicates. (`take` without `orderBy` was rejected above; + // `orderBy` + `take` took the bounded-merge branch above.) // // `orderBy` without `take` still needs the order keys projected so the // whole-set re-sort below can read them. @@ -985,13 +1000,6 @@ export class PostgresRunStore implements RunStore { combined = combined.sort(comparator); } - // `take` without `orderBy`: an unordered cap. Each table was capped at - // `take`, so the concatenation is at most `2*take`; trim to `take`. Order - // among unordered rows is unspecified either way. - if (args.take !== undefined) { - combined = combined.slice(0, args.take); - } - return this.#stripAddedKeys(combined, addedKeys); } From 8ee83c5a15781759b14af075119701a0a3e8a4da Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 18:11:58 +0100 Subject: [PATCH 66/83] test(run-store): drop obsolete findRuns take-without-orderBy cap test The guard added in the previous commit makes that call throw rather than return a non-deterministic cap; this test asserted the removed cap behavior. The throw is covered by the guard test alongside the skip/cursor guards. --- .../run-store/src/PostgresRunStore.test.ts | 40 ++----------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 3458d698e20..7efd3fce048 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -2292,43 +2292,9 @@ describe("PostgresRunStore — table routing by id format", () => { } ); - postgresTest( - "findRuns (take, no orderBy) caps the combined result across both tables", - async ({ prisma }) => { - const { organization, project, environment } = await seedEnvironment(prisma); - const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); - - // 2 legacy + 2 v2; an unordered `take: 3` must return exactly 3, all - // belonging to the scoped env. - const runs = [ - RunId.generate(), - RunId.generate(), - RunId.generateKsuid(), - RunId.generateKsuid(), - ]; - for (const run of runs) { - await seedRoutedRun(prisma, { - id: run.id, - friendlyId: run.friendlyId, - organizationId: organization.id, - projectId: project.id, - runtimeEnvironmentId: environment.id, - }); - } - - const found = await store.findRuns({ - where: { runtimeEnvironmentId: environment.id }, - select: { id: true }, - take: 3, - }); - - expect(found).toHaveLength(3); - const allIds = new Set(runs.map((r) => r.id)); - for (const run of found) { - expect(allIds.has(run.id)).toBe(true); - } - } - ); + // NOTE: `findRuns(take, no orderBy)` across both tables used to cap the + // concatenation to `take` (non-deterministic — could drop one table's rows). + // It now throws (see the guard test below, next to the skip/cursor guards). postgresTest( "findRuns (ordered+limited) by id alone is rejected: id is not a total cross-table order", From 0143ade91047c9388fa35cd01796e4cabf8cb83e Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 18:54:46 +0100 Subject: [PATCH 67/83] fix(webapp): close pass-2 cross-table gaps (span-detail 500, one-time-token claim) - The strengthened findRuns guard threw on GET /api/v1/runs/:runId/spans/:spanId, which pages child runs with take and no orderBy across both tables. Add a createdAt order so it takes the bounded cross-table merge (and the 50-row cap is now deterministic, most recent first) instead of throwing for every org. - Key the one-time-use-token cross-table claim on the token alone (a reserved task slot), matching the task-independent oneTimeUseToken unique constraint, so a multi-task token cannot mint twice across the flip. Stop excluding triggerAndWait from the token claim. Always resolve a held claim on the success path (publish, else release) so it cannot leak until its TTL. --- .../api.v1.runs.$runId.spans.$spanId.ts | 5 +++ .../concerns/idempotencyKeys.server.ts | 41 ++++++++++++------- .../runEngine/services/triggerTask.server.ts | 29 ++++++++----- apps/webapp/test/oneTimeUseTokenClaim.test.ts | 21 ++++++---- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts b/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts index 061199f33e9..4e8d85b7cf8 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts @@ -126,6 +126,11 @@ export const loader = createLoaderApiRoute( const triggeredRuns = await runStore.findRuns( { take: 50, + // A parentSpanId predicate spans both run tables (it carries no id), so + // the cross-table store requires a total-order key to bound the merge; + // createdAt also makes the 50-row cap deterministic (most recent first) + // rather than an arbitrary single-table slice. + orderBy: { createdAt: "desc" }, select: { friendlyId: true, taskIdentifier: true, diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 383ce4399cc..3d910727844 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -21,6 +21,18 @@ import type { TraceEventConcern, TriggerTaskRequest } from "../types"; // handleTriggerRequest. const resolveOrgMollifierFlag = makeResolveMollifierFlag(); +// Reserved task slot for the cross-table one-time-use-token claim. The DB +// constraint `@@unique([oneTimeUseToken])` is TASK-INDEPENDENT, so the claim +// must be keyed on the token alone, not (task, token): a single token can +// authorise more than one task, and two presentations for different tasks +// straddling a `runTableV2` flip would otherwise build different claim keys and +// both proceed. Folding the token into one constant task slot makes the claim +// key (envId, token)-scoped, matching the DB constraint's scope. Paired with +// the `otu:` idempotencyKey prefix, collision with a real task's idempotency +// claim would require a task literally named this AND an idempotency key of the +// form `otu:`. +const ONE_TIME_USE_TOKEN_CLAIM_TASK = "__one_time_use_token__"; + // Claim ownership context returned to the caller when the // IdempotencyKeyConcern won a pre-gate claim. Caller MUST publish the // winning runId on pipeline success (`publishClaim`) or release the @@ -222,29 +234,30 @@ export class IdempotencyKeyConcern { // unique constraint on `oneTimeUseToken` can't see across the two tables, // so neither INSERT raises P2002 and one token spawns two runs. For // v2-cutover orgs, serialise on the token via a Redis claim so the first - // presentation wins and the rest resolve to it. Excludes - // resumeParentOnCompletion (triggerAndWait) to match the buffer - // fallback's handling — a one-time PUBLIC_JWT token is a fire-and-forget - // public trigger, not a parent/child wait, so that case is left to the - // per-table constraint. + // presentation wins and the rest are rejected as already-used. Not + // excluded for resumeParentOnCompletion: for v2 orgs the idempotency-keyed + // claim covers triggerAndWait too (claimEligible short-circuits on + // shouldUseV2RunTable), so the token claim is consistent in doing the same; + // the loser is rejected (not returned a cached run), so there is no + // waitpoint-blocking subtlety to avoid. const oneTimeUseToken = request.options?.oneTimeUseToken; - if (oneTimeUseToken && !request.body.options?.resumeParentOnCompletion) { + if (oneTimeUseToken) { const orgFeatureFlags = (request.environment.organization?.featureFlags as | Record | null | undefined) ?? null; if (shouldUseV2RunTable(orgFeatureFlags)) { - // Namespace the claim key so a token can never collide with a real - // idempotency key in the same (envId, taskIdentifier) slot. The TTL is - // a fixed pipeline-dwell bound, NOT the customer idempotencyKeyTTL: - // there is no idempotency key in this path, so a client-supplied TTL - // has no meaning here, and a tiny value would expire the claim - // mid-flight and reopen the cross-table dup window. + // Key the claim on (envId, token), task-independent, to match the DB's + // task-independent oneTimeUseToken constraint (see the constant's + // comment). The TTL is a fixed pipeline-dwell bound, NOT the customer + // idempotencyKeyTTL: there is no idempotency key in this path, so a + // client-supplied TTL has no meaning here, and a tiny value would + // expire the claim mid-flight and reopen the cross-table dup window. const claimKey = `otu:${oneTimeUseToken}`; const outcome = await claimOrAwait({ envId: request.environment.id, - taskIdentifier: request.taskId, + taskIdentifier: ONE_TIME_USE_TOKEN_CLAIM_TASK, idempotencyKey: claimKey, ttlSeconds: env.TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS, safetyNetMs: env.TRIGGER_MOLLIFIER_CLAIM_WAIT_MS, @@ -274,7 +287,7 @@ export class IdempotencyKeyConcern { idempotencyKeyExpiresAt, claim: { envId: request.environment.id, - taskIdentifier: request.taskId, + taskIdentifier: ONE_TIME_USE_TOKEN_CLAIM_TASK, idempotencyKey: claimKey, token: outcome.token, }, diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 8c61b7d7fcd..956433c28b0 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -716,17 +716,24 @@ export class RunEngineTriggerTaskService { } }, ); - // Pipeline returned successfully — publish the claim if we held - // one. Waiters polling for our key resolve to this runId. - if (idempotencyClaim && result?.run?.friendlyId) { - await publishMollifierClaim({ - envId: idempotencyClaim.envId, - taskIdentifier: idempotencyClaim.taskIdentifier, - idempotencyKey: idempotencyClaim.idempotencyKey, - token: idempotencyClaim.token, - runId: result.run.friendlyId, - ttlSeconds: env.TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS, - }); + // Pipeline returned — resolve the claim if we held one. On success (a run + // with a friendlyId) publish it so waiters resolve to this runId; + // otherwise release it. Never leave a held claim unresolved on the success + // path: an orphaned claim would block concurrent waiters for the full + // safety-net window even though this request did not produce a run. + if (idempotencyClaim) { + if (result?.run?.friendlyId) { + await publishMollifierClaim({ + envId: idempotencyClaim.envId, + taskIdentifier: idempotencyClaim.taskIdentifier, + idempotencyKey: idempotencyClaim.idempotencyKey, + token: idempotencyClaim.token, + runId: result.run.friendlyId, + ttlSeconds: env.TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS, + }); + } else { + await releaseMollifierClaim(idempotencyClaim); + } } return result; } catch (err) { diff --git a/apps/webapp/test/oneTimeUseTokenClaim.test.ts b/apps/webapp/test/oneTimeUseTokenClaim.test.ts index b03d2d6de23..3b539516826 100644 --- a/apps/webapp/test/oneTimeUseTokenClaim.test.ts +++ b/apps/webapp/test/oneTimeUseTokenClaim.test.ts @@ -70,14 +70,18 @@ describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () => expect(result.isCached).toBe(false); if (result.isCached === false) { - // The trigger pipeline must publish/release this claim — keyed on the - // namespaced token so it can never collide with a real idempotency key. + // The trigger pipeline must publish/release this claim. It is keyed on + // the namespaced token AND a reserved, task-independent slot — matching + // the task-independent oneTimeUseToken DB constraint, NOT request.taskId. expect(result.claim?.idempotencyKey).toBe("otu:tok-1"); expect(result.claim?.envId).toBe("env_a"); - expect(result.claim?.taskIdentifier).toBe("my-task"); + expect(result.claim?.taskIdentifier).toBe("__one_time_use_token__"); } expect(claimIdempotency).toHaveBeenCalledTimes(1); - expect(claimIdempotency.mock.calls[0][0]).toMatchObject({ idempotencyKey: "otu:tok-1" }); + expect(claimIdempotency.mock.calls[0][0]).toMatchObject({ + idempotencyKey: "otu:tok-1", + taskIdentifier: "__one_time_use_token__", + }); }); it("v2 org: a concurrent winner (claim resolved) rejects the second presentation as already-used", async () => { @@ -113,7 +117,7 @@ describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () => expect(claimIdempotency).not.toHaveBeenCalled(); }); - it("triggerAndWait one-time token: left to the per-table constraint (not claimed here)", async () => { + it("triggerAndWait one-time token IS claimed (v2 orgs serialise it like the keyed claim)", async () => { const claimIdempotency = vi.fn(async () => ({ kind: "claimed" as const })); h.buffer = { claimIdempotency, @@ -127,9 +131,12 @@ describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () => expect(result.isCached).toBe(false); if (result.isCached === false) { - expect(result.claim).toBeUndefined(); + // resumeParentOnCompletion is NOT excluded from the token claim: for a v2 + // org the cross-table dup hole is identical, and the loser is rejected + // (no cached-run waitpoint subtlety to avoid). + expect(result.claim?.idempotencyKey).toBe("otu:tok-1"); } - expect(claimIdempotency).not.toHaveBeenCalled(); + expect(claimIdempotency).toHaveBeenCalledTimes(1); }); it("no one-time token: ordinary no-idempotency-key trigger is unaffected", async () => { From 5f14bf32537cc434041427b960980616e5939feb Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 19:29:31 +0100 Subject: [PATCH 68/83] fix(webapp): gate runTableV2 on native realtime and drop the Electric shape merge The Electric dual-shape merge was a bridge to let the Electric backend observe v2 runs during the cutover, but Electric is short-lived and the merge taxed every tag/batch realtime feed with a second long-poll the moment it deployed. Gate the v2 run table on the native realtime backend instead (the native client is table-agnostic and observes v2 runs directly), so a run only routes to task_run_v2 once its org is on native. Remove the merge module and restore the single-table Electric proxy. The cross-table correctness work stays: a v2 run can still have a cross-table parent or child once an org flips, so the cancelRun cascade, metadata parent/root routing, the one-time-token claim, and the findRuns guard all still apply regardless of realtime backend. --- .../concerns/idempotencyKeys.server.ts | 10 +- .../runEngine/services/triggerTask.server.ts | 4 +- .../realtime/electricShapeMerge.server.ts | 183 ----------- .../app/services/realtimeClient.server.ts | 294 +----------------- apps/webapp/app/v3/runTableV2.server.ts | 41 ++- apps/webapp/test/electricShapeMerge.test.ts | 250 --------------- apps/webapp/test/oneTimeUseTokenClaim.test.ts | 30 +- apps/webapp/test/realtimeClient.test.ts | 7 +- apps/webapp/test/runTableV2.test.ts | 46 ++- 9 files changed, 107 insertions(+), 758 deletions(-) delete mode 100644 apps/webapp/app/services/realtime/electricShapeMerge.server.ts delete mode 100644 apps/webapp/test/electricShapeMerge.test.ts diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 3d910727844..65b4e3cd0cc 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -247,7 +247,11 @@ export class IdempotencyKeyConcern { | Record | null | undefined) ?? null; - if (shouldUseV2RunTable(orgFeatureFlags)) { + if ( + shouldUseV2RunTable(orgFeatureFlags, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }) + ) { // Key the claim on (envId, token), task-independent, to match the DB's // task-independent oneTimeUseToken constraint (see the constant's // comment). The TTL is a fixed pipeline-dwell bound, NOT the customer @@ -426,7 +430,9 @@ export class IdempotencyKeyConcern { // a pathological client. shouldUseV2RunTable is checked first so a v2 org // skips the mollifier-flag resolve entirely. const claimEligible = - shouldUseV2RunTable(orgFeatureFlags) || + shouldUseV2RunTable(orgFeatureFlags, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }) || (!request.body.options?.resumeParentOnCompletion && !request.body.options?.debounce && !request.options?.oneTimeUseToken && diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 956433c28b0..bdf656e3afc 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -159,7 +159,9 @@ export class RunEngineTriggerTaskService { // trigger hot path. Downstream routing is by id format only. const runFriendlyId = options?.runFriendlyId ?? - (shouldUseV2RunTable(environment.organization.featureFlags) + (shouldUseV2RunTable(environment.organization.featureFlags, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }) ? RunId.generateKsuid() : RunId.generate() ).friendlyId; diff --git a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts b/apps/webapp/app/services/realtime/electricShapeMerge.server.ts deleted file mode 100644 index 751bcd22dc6..00000000000 --- a/apps/webapp/app/services/realtime/electricShapeMerge.server.ts +++ /dev/null @@ -1,183 +0,0 @@ -/** - * Pure helpers for merging TWO upstream Electric shapes (one per physical run - * table — `TaskRun` and `task_run_v2`) into a single shape the realtime client - * consumes. A tag-list or batch feed matches runs in both tables during/after a - * `runTableV2` cutover, but an Electric shape is bound to one table, so the - * proxy fans out to two shapes and presents one composite continuation - * (`handle` / `offset` / `cursor`) that the client round-trips opaquely. - * - * Kept dependency-free (no DB/Redis/fetch) so the merge logic is unit-testable. - */ - -// Separator packing the two per-table continuation values into one opaque -// token. Electric's handle/offset/cursor values are alphanumeric plus `_`/`-` -// (UUID-ish handles, `_` offsets, numeric cursors) and never contain -// `~`, so it is collision-free for this charset. -export const COMPOSITE_SEP = "~"; - -export const UP_TO_DATE_MESSAGE = { headers: { control: "up-to-date" } } as const; -export const MUST_REFETCH_MESSAGE = { headers: { control: "must-refetch" } } as const; - -/** A parsed per-table shape response: continuation headers + the change rows. */ -export type ParsedShape = { - status: number; - handle?: string; - offset?: string; - cursor?: string; - schema?: string; - /** Change messages only (control messages stripped). */ - changes: unknown[]; - upToDate: boolean; - mustRefetch: boolean; -}; - -/** The prior per-table continuation the client sent (used when a shape is left - * un-polled because the other returned first). */ -export type PriorContinuation = { - handleA?: string; - offsetA: string; - cursorA?: string; - handleB?: string; - offsetB: string; - cursorB?: string; -}; - -export type MergedShape = - | { mustRefetch: true } - | { - mustRefetch: false; - changes: unknown[]; - handle: string; - offset: string; - cursor?: string; - schema?: string; - /** - * The composite is up-to-date only when BOTH shapes are. An Electric - * snapshot can span multiple chunks: every chunk but the last omits the - * `up-to-date` control message. If one table's snapshot is still mid-fetch - * (chunk 1 of N) while the other has completed, the merged response must - * NOT terminate with `up-to-date` — otherwise the client believes the - * whole snapshot is done, flips to live, and never fetches the remaining - * chunks (silently dropping that table's overflow rows). - */ - upToDate: boolean; - }; - -/** - * Split a composite "~" value back into its per-table parts. A value with - * no separator (or null/empty) means the client hasn't been handed a composite - * yet (the initial request before any shape exists) -> both undefined. - */ -export function decodeCompositePart(value: string | null | undefined): { - a: string | undefined; - b: string | undefined; -} { - if (!value) return { a: undefined, b: undefined }; - const idx = value.indexOf(COMPOSITE_SEP); - if (idx === -1) return { a: undefined, b: undefined }; - return { - a: value.slice(0, idx) || undefined, - b: value.slice(idx + COMPOSITE_SEP.length) || undefined, - }; -} - -/** - * The offset is never absent — Electric uses "-1" for the initial request — so - * a bare value applies to BOTH shapes (initial), and a composite splits. - */ -export function decodeCompositeOffset(offset: string): { a: string; b: string } { - const idx = offset.indexOf(COMPOSITE_SEP); - if (idx === -1) return { a: offset, b: offset }; - return { a: offset.slice(0, idx), b: offset.slice(idx + COMPOSITE_SEP.length) }; -} - -export function encodeComposite(a: string, b: string): string { - return `${a}${COMPOSITE_SEP}${b}`; -} - -/** Parse the raw body + headers of one upstream shape response. */ -export function parseShapeMessages( - status: number, - headers: { - handle?: string; - offset?: string; - cursor?: string; - schema?: string; - }, - bodyText: string -): ParsedShape { - const base = { status, ...headers }; - if (status >= 400) { - return { ...base, changes: [], upToDate: false, mustRefetch: status === 409 }; - } - let parsed: unknown; - try { - parsed = bodyText.trim() ? JSON.parse(bodyText) : []; - } catch { - // Unparseable body — safest is to make the client refetch the shape. - return { ...base, changes: [], upToDate: false, mustRefetch: true }; - } - if (!Array.isArray(parsed)) { - return { ...base, changes: [], upToDate: false, mustRefetch: true }; - } - const messages = parsed as Array<{ headers?: { control?: string } }>; - const changes = messages.filter((m) => !m?.headers?.control); - const mustRefetch = messages.some((m) => m?.headers?.control === "must-refetch"); - const upToDate = messages.some((m) => m?.headers?.control === "up-to-date"); - return { ...base, changes, upToDate, mustRefetch }; -} - -/** - * Merge two parsed per-table shapes into one composite payload. If either shape - * needs a refetch (409 / must-refetch / unparseable), the whole composite is - * reset. Otherwise the change rows are concatenated (the client merges by key, - * so order across tables doesn't matter) and the continuation values are packed - * per table, falling back to the client's prior value for a shape that wasn't - * re-polled this round. - */ -export function mergeParsedShapes( - a: ParsedShape, - b: ParsedShape, - prior: PriorContinuation -): MergedShape { - if (a.mustRefetch || b.mustRefetch || a.status >= 400 || b.status >= 400) { - return { mustRefetch: true }; - } - const cursorA = a.cursor ?? prior.cursorA; - const cursorB = b.cursor ?? prior.cursorB; - const cursor = - cursorA !== undefined || cursorB !== undefined - ? encodeComposite(cursorA ?? "", cursorB ?? "") - : undefined; - return { - mustRefetch: false, - changes: [...a.changes, ...b.changes], - handle: encodeComposite(a.handle ?? prior.handleA ?? "", b.handle ?? prior.handleB ?? ""), - offset: encodeComposite(a.offset ?? prior.offsetA, b.offset ?? prior.offsetB), - cursor, - schema: a.schema ?? b.schema, - // Only terminate the composite when BOTH shapes have caught up; an - // un-up-to-date shape (a snapshot chunk that isn't the last) keeps the - // client requesting the remainder. unpolledShape() reports upToDate:true, - // so a live round that returns changes from one shape and carries the - // other forward still terminates iff the polled shape is itself up-to-date. - upToDate: a.upToDate && b.upToDate, - }; -} - -/** A synthetic "no change this round" result for a shape left un-polled because - * the other returned changes first; carries its prior continuation forward. */ -export function unpolledShape( - which: "a" | "b", - prior: PriorContinuation -): ParsedShape { - return { - status: 200, - handle: which === "a" ? prior.handleA : prior.handleB, - offset: which === "a" ? prior.offsetA : prior.offsetB, - cursor: which === "a" ? prior.cursorA : prior.cursorB, - changes: [], - upToDate: true, - mustRefetch: false, - }; -} diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index cd95dbaaec4..12b93f1996d 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -1,18 +1,6 @@ import { json } from "@remix-run/server-runtime"; import { tryCatch } from "@trigger.dev/core/utils"; -import { isKsuidId, safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; -import { - decodeCompositeOffset, - decodeCompositePart, - mergeParsedShapes, - MUST_REFETCH_MESSAGE, - parseShapeMessages, - unpolledShape, - UP_TO_DATE_MESSAGE, - type MergedShape, - type ParsedShape, - type PriorContinuation, -} from "./realtime/electricShapeMerge.server"; +import { safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic"; import { Callback, Result } from "ioredis"; import { randomUUID } from "node:crypto"; import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server"; @@ -61,11 +49,6 @@ const DEFAULT_ELECTRIC_COLUMNS = [ const RESERVED_COLUMNS = ["id", "taskIdentifier", "friendlyId", "status", "createdAt"]; const RESERVED_SEARCH_PARAMS = ["createdAt", "tags", "skipColumns"]; -// The two physical run tables a realtime shape can target. A run lives in -// exactly one, keyed by id format (ksuid -> task_run_v2, cuid -> TaskRun). -const TASK_RUN_TABLE = 'public."TaskRun"'; -const TASK_RUN_V2_TABLE = 'public."task_run_v2"'; - export type RealtimeClientOptions = { electricOrigin: string | string[]; redis: RedisWithClusterOptions; @@ -135,15 +118,10 @@ export class RealtimeClient { clientVersion?: string, signal?: AbortSignal ) { - // Route the shape to the physical table the run lives in: a v2 run's id is - // a KSUID (task_run_v2), a legacy run's a cuid (TaskRun). The run was - // already resolved by the route, so this id is authoritative. - const table = isKsuidId(runId) ? TASK_RUN_V2_TABLE : TASK_RUN_TABLE; return this.#streamRunsWhere( url, environment, `id='${runId}'`, - table, apiVersion, requestOptions, clientVersion, @@ -167,7 +145,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - return this.#streamRunsAcrossTables( + return this.#streamRunsWhere( url, environment, whereClause, @@ -201,7 +179,7 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - const response = await this.#streamRunsAcrossTables( + const response = await this.#streamRunsWhere( url, environment, whereClause, @@ -300,7 +278,6 @@ export class RealtimeClient { url: URL | string, environment: RealtimeEnvironment, whereClause: string, - table: string, apiVersion: API_VERSIONS, requestOptions?: RealtimeRequestOptions, clientVersion?: string, @@ -310,7 +287,6 @@ export class RealtimeClient { url, environment, whereClause, - table, requestOptions, clientVersion ); @@ -324,272 +300,10 @@ export class RealtimeClient { ); } - // Stream a feed that spans BOTH physical run tables (the tag-list and batch - // feeds) by running two upstream Electric shapes — public."TaskRun" and - // public."task_run_v2" — under a single composite continuation the client - // round-trips opaquely. A run lives in exactly one table, so the union of the - // two shapes is the full feed; the client merges by row key and never learns - // there are two shapes. See electricShapeMerge.server.ts for the pure logic. - // - // Cost: this opens TWO upstream Electric long-polls per tag/batch - // subscription (vs one for a single-table feed), so these feeds use ~2x - // Electric connections while an org has runs across both tables. Single-run - // subscriptions are unaffected — one shape, routed to the run's table by id - // format. - async #streamRunsAcrossTables( - url: URL | string, - environment: RealtimeEnvironment, - whereClause: string, - apiVersion: API_VERSIONS, - requestOptions?: RealtimeRequestOptions, - clientVersion?: string, - signal?: AbortSignal - ): Promise { - const $url = new URL(url.toString()); - const isLive = isLiveRequestUrl($url); - const incomingHandle = extractShapeId($url); - const incomingOffset = $url.searchParams.get("offset") ?? "-1"; - const incomingCursor = $url.searchParams.get("cursor"); - - const handles = decodeCompositePart(incomingHandle); - const offsets = decodeCompositeOffset(incomingOffset); - const cursors = decodeCompositePart(incomingCursor); - - const prior: PriorContinuation = { - handleA: handles.a, - offsetA: offsets.a, - cursorA: cursors.a, - handleB: handles.b, - offsetB: offsets.b, - cursorB: cursors.b, - }; - - const urlA = this.#constructMergeShapeUrl( - $url, - environment, - whereClause, - TASK_RUN_TABLE, - { handle: handles.a, offset: offsets.a, cursor: cursors.a }, - requestOptions, - clientVersion - ); - const urlB = this.#constructMergeShapeUrl( - $url, - environment, - whereClause, - TASK_RUN_V2_TABLE, - { handle: handles.b, offset: offsets.b, cursor: cursors.b }, - requestOptions, - clientVersion - ); - - // One concurrency slot for the composite live request: it maps to a single - // client request even though we fan out to two upstream long-polls. - let requestId: string | undefined; - if (isLive && incomingHandle) { - const concurrencyLimit = await this.cachedLimitProvider.getCachedLimit( - environment.organizationId, - 100_000 - ); - if (!concurrencyLimit) { - logger.error("Failed to get concurrency limit", { - organizationId: environment.organizationId, - }); - return json({ error: "Failed to get concurrency limit" }, { status: 500 }); - } - requestId = randomUUID(); - if (!(await this.#incrementAndCheck(environment.id, requestId, concurrencyLimit))) { - return json({ error: "Too many concurrent requests" }, { status: 429 }); - } - } - - try { - const merged = await this.#raceAndMergeShapes(urlA, urlB, isLive, prior, signal); - return this.#buildMergeResponse(merged, isLive, apiVersion, clientVersion); - } finally { - if (requestId) { - await this.#decrementConcurrency(environment.id, requestId); - } - } - } - - // Build the per-table Electric URL, replacing the composite continuation the - // client sent with this table's decoded part. - #constructMergeShapeUrl( - baseUrl: URL, - environment: RealtimeEnvironment, - whereClause: string, - table: string, - perTable: { handle?: string; offset: string; cursor?: string }, - requestOptions?: RealtimeRequestOptions, - clientVersion?: string - ): URL { - const electricUrl = this.#constructRunsElectricUrl( - baseUrl, - environment, - whereClause, - table, - requestOptions, - clientVersion - ); - // Upstream always speaks current Electric (handle, not shape_id). - electricUrl.searchParams.delete("shape_id"); - if (perTable.handle !== undefined) { - electricUrl.searchParams.set("handle", perTable.handle); - } else { - electricUrl.searchParams.delete("handle"); - } - electricUrl.searchParams.set("offset", perTable.offset); - if (perTable.cursor !== undefined) { - electricUrl.searchParams.set("cursor", perTable.cursor); - } else { - electricUrl.searchParams.delete("cursor"); - } - return electricUrl; - } - - // Fetch both shapes. For a live request, return as soon as ONE yields changes - // (or needs a refetch) and carry the other's prior continuation forward — so a - // change on either table isn't delayed by the other's idle long-poll. If the - // first to settle had nothing, wait for the other before responding. - async #raceAndMergeShapes( - urlA: URL, - urlB: URL, - isLive: boolean, - prior: PriorContinuation, - signal?: AbortSignal - ): Promise { - const ctlA = new AbortController(); - const ctlB = new AbortController(); - const link = (ctl: AbortController) => - signal ? AbortSignal.any([signal, ctl.signal]) : ctl.signal; - - let aRes: ParsedShape | undefined; - let bRes: ParsedShape | undefined; - const pA = this.#fetchShape(urlA, link(ctlA)).then((r) => { - aRes = r; - return "a" as const; - }); - const pB = this.#fetchShape(urlB, link(ctlB)).then((r) => { - bRes = r; - return "b" as const; - }); - // A shape we don't end up awaiting (the race loser we abort, or the sibling - // left pending when the catch below rethrows) must not surface as an - // unhandled rejection. Attach detached no-op catches up front; the - // race/await paths still observe the original rejections through their own - // reactions, so this only swallows an otherwise-orphaned rejection. - void pA.catch(() => {}); - void pB.catch(() => {}); - - try { - if (!isLive) { - await Promise.all([pA, pB]); - return mergeParsedShapes(aRes!, bRes!, prior); - } - - const actionable = (r: ParsedShape) => - r.mustRefetch || r.status >= 400 || r.changes.length > 0; - - const first = await Promise.race([pA, pB]); - const firstRes = first === "a" ? aRes! : bRes!; - if (actionable(firstRes)) { - // Got changes/refetch from one shape; abort the other and return - // immediately. Its rejection is already swallowed by the catch attached - // above, so the abort can't surface as an unhandled rejection. - (first === "a" ? ctlB : ctlA).abort(); - return first === "a" - ? mergeParsedShapes(aRes!, unpolledShape("b", prior), prior) - : mergeParsedShapes(unpolledShape("a", prior), bRes!, prior); - } - - // First settled empty (idle timeout) — wait for the other. - await (first === "a" ? pB : pA); - return mergeParsedShapes(aRes!, bRes!, prior); - } catch (error) { - ctlA.abort(); - ctlB.abort(); - throw error; - } - } - - async #fetchShape(electricUrl: URL, signal?: AbortSignal): Promise { - const resp = await longPollingFetch(electricUrl.toString(), { signal }); - const headers = { - handle: - resp.headers.get("electric-handle") ?? resp.headers.get("electric-shape-id") ?? undefined, - offset: - resp.headers.get("electric-offset") ?? - resp.headers.get("electric-chunk-last-offset") ?? - undefined, - cursor: resp.headers.get("electric-cursor") ?? undefined, - schema: resp.headers.get("electric-schema") ?? undefined, - }; - if (resp.status >= 400) { - try { - await resp.body?.cancel(); - } catch {} - return parseShapeMessages(resp.status, headers, ""); - } - const bodyText = await resp.text(); - return parseShapeMessages(resp.status, headers, bodyText); - } - - #buildMergeResponse( - merged: MergedShape, - isLive: boolean, - apiVersion: API_VERSIONS, - clientVersion?: string - ): Response { - const responseHeaders = new Headers(); - responseHeaders.set("content-type", "application/json"); - responseHeaders.set("cache-control", "no-store"); - // Match the native client: expose electric-* headers cross-origin or the - // deployed react-hooks fail with MissingHeadersError. - responseHeaders.set("access-control-allow-origin", "*"); - responseHeaders.set("access-control-expose-headers", "*"); - - if (merged.mustRefetch) { - // Reset the client's shape state; it refetches both tables from scratch. - return new Response(JSON.stringify([MUST_REFETCH_MESSAGE, UP_TO_DATE_MESSAGE]), { - status: 409, - headers: responseHeaders, - }); - } - - if (clientVersion) { - responseHeaders.set("electric-handle", merged.handle); - responseHeaders.set("electric-offset", merged.offset); - } else { - responseHeaders.set("electric-shape-id", merged.handle); - responseHeaders.set("electric-chunk-last-offset", merged.offset); - } - if (isLive) { - // The client requires electric-cursor on every live response (its live - // cache-buster). Fall back to the offset if neither shape provided one. - responseHeaders.set("electric-cursor", merged.cursor ?? merged.offset); - } else if (merged.schema !== undefined) { - // Non-live responses require electric-schema. - responseHeaders.set("electric-schema", merged.schema); - } - - // Only append the up-to-date terminator when BOTH upstream shapes are - // caught up. If one table's snapshot is still spanning chunks, omitting the - // terminator keeps the client in snapshot mode fetching the rest instead of - // prematurely flipping to live and dropping that table's remaining rows. - const body = JSON.stringify( - merged.upToDate ? [...merged.changes, UP_TO_DATE_MESSAGE] : [...merged.changes] - ); - const finalBody = - apiVersion === CURRENT_API_VERSION ? body : this.#rewriteResponseBodyForNoneApiVersion(body); - return new Response(finalBody, { status: 200, headers: responseHeaders }); - } - #constructRunsElectricUrl( url: URL | string, environment: RealtimeEnvironment, whereClause: string, - table: string, requestOptions?: RealtimeRequestOptions, clientVersion?: string ): URL { @@ -608,7 +322,7 @@ export class RealtimeClient { }); electricUrl.searchParams.set("where", whereClause); - electricUrl.searchParams.set("table", table); + electricUrl.searchParams.set("table", 'public."TaskRun"'); if (!clientVersion) { // If the client version is not provided, that means we're using an older client diff --git a/apps/webapp/app/v3/runTableV2.server.ts b/apps/webapp/app/v3/runTableV2.server.ts index 51b55aefbe3..5fa089fbd6e 100644 --- a/apps/webapp/app/v3/runTableV2.server.ts +++ b/apps/webapp/app/v3/runTableV2.server.ts @@ -1,5 +1,15 @@ import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags"; +export type ShouldUseV2RunTableOptions = { + /** + * Whether the native realtime backend is enabled for this deployment + * (`env.REALTIME_BACKEND_NATIVE_ENABLED === "1"`). Passed in rather than read + * from env here so this stays a pure, env-free function the caller can + * unit-test directly. + */ + nativeRealtimeEnabled: boolean; +}; + /** * Per-org cutover switch for the parallel `task_run_v2` run table. * @@ -9,20 +19,45 @@ import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags"; * routes the run to `task_run_v2`; off (the default) → mint a legacy id, which * routes to `TaskRun`. * + * GATED ON NATIVE REALTIME. The Electric realtime backend serves shapes bound + * to a single table (`TaskRun`) and is being retired; only the native backend + * is table-agnostic and can observe a `task_run_v2` run in realtime + * (subscribeToRun / useRealtimeRun / poll). Routing a run to v2 while the org is + * still served by Electric would make that run silently invisible in realtime, + * so v2 requires BOTH the deployment master switch (`nativeRealtimeEnabled`) and + * the org's `realtimeBackend` flag set to "native". This is a temporary + * coupling: once Electric is removed and native is the only/default backend, + * drop the native check. + * * RunStore never reads this flag: it routes purely by id format. The flag only * decides which id scheme is minted upstream. Disabling it sends only NEW runs * back to legacy; runs already created on v2 stay readable there (routed by id). */ -export function shouldUseV2RunTable(orgFeatureFlags: unknown): boolean { +export function shouldUseV2RunTable( + orgFeatureFlags: unknown, + options: ShouldUseV2RunTableOptions +): boolean { if (orgFeatureFlags === null || typeof orgFeatureFlags !== "object") { return false; } + const flags = orgFeatureFlags as Record; - const override = (orgFeatureFlags as Record)[FEATURE_FLAG.runTableV2]; - if (override === undefined) { + // Native realtime is a hard prerequisite (see doc comment): a v2 run is only + // observable in realtime on the native backend. + if (!options.nativeRealtimeEnabled) { + return false; + } + const backend = FeatureFlagCatalog[FEATURE_FLAG.realtimeBackend].safeParse( + flags[FEATURE_FLAG.realtimeBackend] + ); + if (!(backend.success && backend.data === "native")) { return false; } + const override = flags[FEATURE_FLAG.runTableV2]; + if (override === undefined) { + return false; + } const parsed = FeatureFlagCatalog[FEATURE_FLAG.runTableV2].safeParse(override); return parsed.success ? parsed.data : false; } diff --git a/apps/webapp/test/electricShapeMerge.test.ts b/apps/webapp/test/electricShapeMerge.test.ts deleted file mode 100644 index fb67d2ba6b4..00000000000 --- a/apps/webapp/test/electricShapeMerge.test.ts +++ /dev/null @@ -1,250 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { - decodeCompositeOffset, - decodeCompositePart, - encodeComposite, - mergeParsedShapes, - parseShapeMessages, - unpolledShape, - type ParsedShape, - type PriorContinuation, -} from "~/services/realtime/electricShapeMerge.server"; - -const INSERT = { - key: '"public"."TaskRun"/"r1"', - value: { id: "r1" }, - headers: { operation: "insert" }, -}; -const UPDATE = { - key: '"public"."task_run_v2"/"r2"', - value: { id: "r2" }, - headers: { operation: "update" }, -}; - -function shape(overrides: Partial = {}): ParsedShape { - return { - status: 200, - handle: "h", - offset: "o", - cursor: "c", - schema: '{"id":{"type":"text"}}', - changes: [], - upToDate: true, - mustRefetch: false, - ...overrides, - }; -} - -const PRIOR: PriorContinuation = { - handleA: "HA", - offsetA: "OA", - cursorA: "CA", - handleB: "HB", - offsetB: "OB", - cursorB: "CB", -}; - -describe("decodeCompositePart", () => { - it("returns both undefined for null / no separator", () => { - expect(decodeCompositePart(null)).toEqual({ a: undefined, b: undefined }); - expect(decodeCompositePart(undefined)).toEqual({ a: undefined, b: undefined }); - expect(decodeCompositePart("")).toEqual({ a: undefined, b: undefined }); - // A bare value with no separator means "not a composite yet" -> initial. - expect(decodeCompositePart("solo")).toEqual({ a: undefined, b: undefined }); - }); - - it("splits a composite into its two parts", () => { - expect(decodeCompositePart("hA~hB")).toEqual({ a: "hA", b: "hB" }); - }); - - it("treats an empty side as undefined", () => { - expect(decodeCompositePart("hA~")).toEqual({ a: "hA", b: undefined }); - expect(decodeCompositePart("~hB")).toEqual({ a: undefined, b: "hB" }); - }); -}); - -describe("decodeCompositeOffset", () => { - it("applies a bare offset (e.g. the initial -1) to both shapes", () => { - expect(decodeCompositeOffset("-1")).toEqual({ a: "-1", b: "-1" }); - }); - - it("splits a composite offset", () => { - expect(decodeCompositeOffset("26800552_0~26800999_2")).toEqual({ - a: "26800552_0", - b: "26800999_2", - }); - }); - - it("round-trips through encodeComposite", () => { - expect(decodeCompositeOffset(encodeComposite("x_1", "y_2"))).toEqual({ a: "x_1", b: "y_2" }); - }); -}); - -describe("parseShapeMessages", () => { - const headers = { handle: "h", offset: "o", cursor: "c", schema: "s" }; - - it("extracts change rows and the up-to-date flag", () => { - const body = JSON.stringify([INSERT, { headers: { control: "up-to-date" } }]); - const parsed = parseShapeMessages(200, headers, body); - expect(parsed.changes).toEqual([INSERT]); - expect(parsed.upToDate).toBe(true); - expect(parsed.mustRefetch).toBe(false); - }); - - it("treats a bare up-to-date as no changes", () => { - const parsed = parseShapeMessages( - 200, - headers, - JSON.stringify([{ headers: { control: "up-to-date" } }]) - ); - expect(parsed.changes).toEqual([]); - expect(parsed.upToDate).toBe(true); - }); - - it("flags must-refetch from a 409 status", () => { - const parsed = parseShapeMessages(409, headers, ""); - expect(parsed.mustRefetch).toBe(true); - expect(parsed.changes).toEqual([]); - }); - - it("flags must-refetch from a control message", () => { - const body = JSON.stringify([ - { headers: { control: "must-refetch" } }, - { headers: { control: "up-to-date" } }, - ]); - expect(parseShapeMessages(200, headers, body).mustRefetch).toBe(true); - }); - - it("flags must-refetch for an unparseable / non-array body", () => { - expect(parseShapeMessages(200, headers, "not json").mustRefetch).toBe(true); - expect(parseShapeMessages(200, headers, "{}").mustRefetch).toBe(true); - }); - - it("treats an empty body as no changes (not up-to-date)", () => { - const parsed = parseShapeMessages(200, headers, ""); - expect(parsed.changes).toEqual([]); - expect(parsed.upToDate).toBe(false); - expect(parsed.mustRefetch).toBe(false); - }); -}); - -describe("mergeParsedShapes", () => { - it("concatenates change rows from both tables", () => { - const merged = mergeParsedShapes( - shape({ changes: [INSERT], handle: "hA", offset: "oA", cursor: "cA" }), - shape({ changes: [UPDATE], handle: "hB", offset: "oB", cursor: "cB" }), - PRIOR - ); - expect(merged.mustRefetch).toBe(false); - if (merged.mustRefetch) return; - expect(merged.changes).toEqual([INSERT, UPDATE]); - expect(merged.handle).toBe(encodeComposite("hA", "hB")); - expect(merged.offset).toBe(encodeComposite("oA", "oB")); - expect(merged.cursor).toBe(encodeComposite("cA", "cB")); - }); - - it("resets when either shape needs a refetch", () => { - expect(mergeParsedShapes(shape({ mustRefetch: true }), shape(), PRIOR)).toEqual({ - mustRefetch: true, - }); - expect(mergeParsedShapes(shape(), shape({ status: 409 }), PRIOR)).toEqual({ - mustRefetch: true, - }); - }); - - it("falls back to the prior continuation for a shape that returned nothing", () => { - // B was left un-polled (the other table returned changes first). - const merged = mergeParsedShapes( - shape({ changes: [INSERT], handle: "hA2", offset: "oA2", cursor: "cA2" }), - unpolledShape("b", PRIOR), - PRIOR - ); - expect(merged.mustRefetch).toBe(false); - if (merged.mustRefetch) return; - expect(merged.changes).toEqual([INSERT]); - expect(merged.handle).toBe(encodeComposite("hA2", "HB")); - expect(merged.offset).toBe(encodeComposite("oA2", "OB")); - expect(merged.cursor).toBe(encodeComposite("cA2", "CB")); - }); - - it("uses the prior cursor when a returned shape omits it", () => { - const merged = mergeParsedShapes( - shape({ cursor: undefined, handle: "hA", offset: "oA" }), - shape({ cursor: "cB", handle: "hB", offset: "oB" }), - PRIOR - ); - if (merged.mustRefetch) throw new Error("unexpected refetch"); - // a omitted cursor -> prior.cursorA ("CA"); b returned "cB". - expect(merged.cursor).toBe(encodeComposite("CA", "cB")); - }); - - it("omits the cursor entirely when neither shape nor prior has one (initial snapshot)", () => { - const initialPrior: PriorContinuation = { offsetA: "-1", offsetB: "-1" }; - const merged = mergeParsedShapes( - shape({ cursor: undefined, handle: "hA", offset: "oA" }), - shape({ cursor: undefined, handle: "hB", offset: "oB" }), - initialPrior - ); - if (merged.mustRefetch) throw new Error("unexpected refetch"); - expect(merged.cursor).toBeUndefined(); - }); - - it("carries schema from whichever shape supplied it", () => { - const merged = mergeParsedShapes( - shape({ schema: undefined }), - shape({ schema: '{"id":{"type":"text"}}' }), - PRIOR - ); - if (merged.mustRefetch) throw new Error("unexpected refetch"); - expect(merged.schema).toBe('{"id":{"type":"text"}}'); - }); - - it("is up-to-date only when BOTH shapes are caught up (multi-chunk snapshot guard)", () => { - // Both caught up -> the composite terminates with up-to-date. - const both = mergeParsedShapes(shape({ upToDate: true }), shape({ upToDate: true }), PRIOR); - if (both.mustRefetch) throw new Error("unexpected refetch"); - expect(both.upToDate).toBe(true); - - // Table A is mid-snapshot (chunk 1 of N: rows but no up-to-date control - // message); B has completed. The composite must NOT be up-to-date — else - // the client flips to live after chunk 1 and silently drops A's remaining - // rows. The rows seen so far still flow through. - const aMidSnapshot = mergeParsedShapes( - shape({ changes: [INSERT], upToDate: false, handle: "hA", offset: "oA" }), - shape({ upToDate: true, handle: "hB", offset: "oB" }), - PRIOR - ); - if (aMidSnapshot.mustRefetch) throw new Error("unexpected refetch"); - expect(aMidSnapshot.upToDate).toBe(false); - expect(aMidSnapshot.changes).toEqual([INSERT]); - - // Symmetric: B mid-snapshot. - const bMidSnapshot = mergeParsedShapes( - shape({ upToDate: true }), - shape({ changes: [UPDATE], upToDate: false }), - PRIOR - ); - if (bMidSnapshot.mustRefetch) throw new Error("unexpected refetch"); - expect(bMidSnapshot.upToDate).toBe(false); - }); - - it("a live round carrying the un-polled sibling terminates only when the polled shape is caught up", () => { - // unpolledShape reports upToDate:true, so the composite terminates iff the - // polled shape is itself caught up. - const caughtUp = mergeParsedShapes( - shape({ changes: [INSERT], upToDate: true }), - unpolledShape("b", PRIOR), - PRIOR - ); - if (caughtUp.mustRefetch) throw new Error("unexpected refetch"); - expect(caughtUp.upToDate).toBe(true); - - const moreComing = mergeParsedShapes( - shape({ changes: [INSERT], upToDate: false }), - unpolledShape("b", PRIOR), - PRIOR - ); - if (moreComing.mustRefetch) throw new Error("unexpected refetch"); - expect(moreComing.upToDate).toBe(false); - }); -}); diff --git a/apps/webapp/test/oneTimeUseTokenClaim.test.ts b/apps/webapp/test/oneTimeUseTokenClaim.test.ts index 3b539516826..9b8e78cd21a 100644 --- a/apps/webapp/test/oneTimeUseTokenClaim.test.ts +++ b/apps/webapp/test/oneTimeUseTokenClaim.test.ts @@ -8,11 +8,18 @@ vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); // claimOrAwait resolves its backend through getIdempotencyClaimBuffer; script // it via a hoisted handle so each test controls the claim outcome. -const h = vi.hoisted(() => ({ buffer: null as unknown })); +const h = vi.hoisted(() => ({ buffer: null as unknown, v2: true })); vi.mock("~/v3/mollifier/mollifierBuffer.server", () => ({ getMollifierBuffer: () => h.buffer, getIdempotencyClaimBuffer: () => h.buffer, })); +// v2 routing is gated on native realtime (deployment env switch + per-org +// `realtimeBackend` flag); that gate is covered by runTableV2.test.ts. Here we +// mock it so each test controls whether the org is cut over to v2, isolating +// the one-time-token claim logic from the gating mechanism. +vi.mock("~/v3/runTableV2.server", () => ({ + shouldUseV2RunTable: () => h.v2, +})); // The one-time-token claim runs BEFORE the mollifier-flag resolve, but the // concern still imports the gate module; stub it so loading doesn't pull in // extra feature-flag wiring. @@ -98,23 +105,24 @@ describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () => ).rejects.toThrow(/already been used/i); }); - it("non-v2 org: skips the token claim entirely (no Redis round-trip)", async () => { + it("org not cut over to v2: skips the token claim entirely (no Redis round-trip)", async () => { + h.v2 = false; const claimIdempotency = vi.fn(async () => ({ kind: "claimed" as const })); h.buffer = { claimIdempotency, readClaim: vi.fn(async () => null), } as unknown as MollifierBuffer; - const result = await makeConcern().handleTriggerRequest( - makeOtuRequest({ featureFlags: { mollifierEnabled: true } }), - undefined - ); - - expect(result.isCached).toBe(false); - if (result.isCached === false) { - expect(result.claim).toBeUndefined(); + try { + const result = await makeConcern().handleTriggerRequest(makeOtuRequest(), undefined); + expect(result.isCached).toBe(false); + if (result.isCached === false) { + expect(result.claim).toBeUndefined(); + } + expect(claimIdempotency).not.toHaveBeenCalled(); + } finally { + h.v2 = true; // restore for the other tests in this file } - expect(claimIdempotency).not.toHaveBeenCalled(); }); it("triggerAndWait one-time token IS claimed (v2 orgs serialise it like the keyed claim)", async () => { diff --git a/apps/webapp/test/realtimeClient.test.ts b/apps/webapp/test/realtimeClient.test.ts index cdff50e3d18..d98213e5b17 100644 --- a/apps/webapp/test/realtimeClient.test.ts +++ b/apps/webapp/test/realtimeClient.test.ts @@ -237,13 +237,8 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { const chunkOffset = headers["electric-offset"]; expect(response.status).toBe(200); - // The tag/list feed spans both physical run tables, so streamRuns merges - // two upstream Electric shapes (TaskRun + task_run_v2) under one composite - // cursor: handle and offset each pack the two per-table values joined by - // "~". Both shapes are at "0_0" for the initial snapshot. expect(shapeId).toBeDefined(); - expect(shapeId).toContain("~"); - expect(chunkOffset).toBe("0_0~0_0"); + expect(chunkOffset).toBe("0_0"); } ); diff --git a/apps/webapp/test/runTableV2.test.ts b/apps/webapp/test/runTableV2.test.ts index 9abae4cb7bb..7aa528b34a9 100644 --- a/apps/webapp/test/runTableV2.test.ts +++ b/apps/webapp/test/runTableV2.test.ts @@ -1,28 +1,50 @@ import { describe, expect, it } from "vitest"; import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; +// v2 is gated on the org being served realtime by the NATIVE backend (Electric +// can't observe task_run_v2). That requires the deployment master switch +// (nativeRealtimeEnabled) AND the per-org `realtimeBackend` flag set to "native". +const NATIVE_ON = { nativeRealtimeEnabled: true }; +const NATIVE_OFF = { nativeRealtimeEnabled: false }; +const onNative = (extra: Record = {}) => ({ realtimeBackend: "native", ...extra }); + describe("shouldUseV2RunTable", () => { it("defaults to false when the org has no flags", () => { - expect(shouldUseV2RunTable(null)).toBe(false); - expect(shouldUseV2RunTable(undefined)).toBe(false); - expect(shouldUseV2RunTable({})).toBe(false); + expect(shouldUseV2RunTable(null, NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(undefined, NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable({}, NATIVE_ON)).toBe(false); + }); + + it("returns true only when runTableV2 is boolean true AND the org is on native realtime", () => { + expect(shouldUseV2RunTable(onNative({ runTableV2: true }), NATIVE_ON)).toBe(true); + expect(shouldUseV2RunTable(onNative({ runTableV2: false }), NATIVE_ON)).toBe(false); }); - it("returns true only when the flag is the boolean true", () => { - expect(shouldUseV2RunTable({ runTableV2: true })).toBe(true); - expect(shouldUseV2RunTable({ runTableV2: false })).toBe(false); + it("requires the native realtime backend (Electric can't observe v2 runs)", () => { + // runTableV2 on, but the org is not on native realtime → no v2 (it would be + // realtime-invisible). + expect(shouldUseV2RunTable({ runTableV2: true }, NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable({ runTableV2: true, realtimeBackend: "electric" }, NATIVE_ON)).toBe( + false + ); + expect(shouldUseV2RunTable({ runTableV2: true, realtimeBackend: "shadow" }, NATIVE_ON)).toBe( + false + ); + // On native per-org, but the deployment master switch is off → effectively + // still Electric → no v2. + expect(shouldUseV2RunTable(onNative({ runTableV2: true }), NATIVE_OFF)).toBe(false); }); it("rejects a stringified flag value (strict boolean, no coercion)", () => { // A stringified "false" must not coerce to true and cut the org over. - expect(shouldUseV2RunTable({ runTableV2: "true" })).toBe(false); - expect(shouldUseV2RunTable({ runTableV2: "false" })).toBe(false); - expect(shouldUseV2RunTable({ runTableV2: 1 })).toBe(false); + expect(shouldUseV2RunTable(onNative({ runTableV2: "true" }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(onNative({ runTableV2: "false" }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(onNative({ runTableV2: 1 }), NATIVE_ON)).toBe(false); }); it("ignores unrelated flags and non-object inputs", () => { - expect(shouldUseV2RunTable({ mollifierEnabled: true })).toBe(false); - expect(shouldUseV2RunTable("runTableV2")).toBe(false); - expect(shouldUseV2RunTable(42)).toBe(false); + expect(shouldUseV2RunTable(onNative({ mollifierEnabled: true }), NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable("runTableV2", NATIVE_ON)).toBe(false); + expect(shouldUseV2RunTable(42, NATIVE_ON)).toBe(false); }); }); From 417fb39074ae932c450bf68b9469bd0bf5b8ab55 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 19:49:42 +0100 Subject: [PATCH 69/83] perf(run-store,webapp): scope idempotency dedup to one table for non-v2 orgs, add cross-table tests The idempotency-key dedup is a non-id predicate, so RunStore read BOTH run tables in parallel on every idempotency-keyed trigger, including orgs not cut over to v2 (whose runs only live in TaskRun, so the task_run_v2 query is always empty; while native realtime is off that is every org). Add an optional `tables: "legacy" | "both"` scope to findRun and pass "legacy" from the idempotency concern when the org is not on v2, keeping the trigger hot path single-table. Backfills cross-table tests the audit flagged as missing: findRun legacy-scope skips task_run_v2, and clearIdempotencyKey fans out across both tables (byPredicate hits v2; a mixed byFriendlyIds array clears both). --- .../concerns/idempotencyKeys.server.ts | 24 ++-- .../run-store/src/PostgresRunStore.test.ts | 121 ++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 41 ++++-- internal-packages/run-store/src/types.ts | 16 ++- 4 files changed, 182 insertions(+), 20 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 65b4e3cd0cc..4c97084b343 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -302,6 +302,20 @@ export class IdempotencyKeyConcern { return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } + // Resolve the org's v2-cutover state ONCE: it gates both the cross-table + // idempotency lookup below and the pre-gate claim further down. While the + // org is not on v2 (the default, and every org while native realtime is + // off) the run can only be in the legacy table, so scope the dedup read to + // "legacy" and skip the empty task_run_v2 query on the trigger hot path. + const orgFeatureFlags = + (request.environment.organization?.featureFlags as + | Record + | null + | undefined) ?? null; + const orgUsesV2 = shouldUseV2RunTable(orgFeatureFlags, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }); + const existingRun = idempotencyKey ? await runStore.findRun( { @@ -313,6 +327,7 @@ export class IdempotencyKeyConcern { include: { associatedWaitpoint: true, }, + tables: orgUsesV2 ? "both" : "legacy", }, this.prisma ) @@ -408,11 +423,6 @@ export class IdempotencyKeyConcern { // TaskRun, ksuid -> task_run_v2); the per-table idempotency unique // constraints can't see each other, so neither INSERT raises P2002 and two // runs share one key. The Redis claim is the only backstop in that window. - const orgFeatureFlags = - (request.environment.organization?.featureFlags as - | Record - | null - | undefined) ?? null; // v2-cutover orgs: an idempotency-keyed trigger can straddle a `runTableV2` // flag flip into different physical tables (cuid -> TaskRun, ksuid -> // task_run_v2), and the per-table idempotency-key unique constraints can't @@ -430,9 +440,7 @@ export class IdempotencyKeyConcern { // a pathological client. shouldUseV2RunTable is checked first so a v2 org // skips the mollifier-flag resolve entirely. const claimEligible = - shouldUseV2RunTable(orgFeatureFlags, { - nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", - }) || + orgUsesV2 || (!request.body.options?.resumeParentOnCompletion && !request.body.options?.debounce && !request.options?.oneTimeUseToken && diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 7efd3fce048..238ab73726e 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -2048,6 +2048,127 @@ describe("PostgresRunStore — table routing by id format", () => { } ); + postgresTest( + "findRun tables:'legacy' skips the task_run_v2 query (idempotency hot-path scope)", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // A v2 (ksuid) run carrying an idempotency key — it lives only in + // task_run_v2. + const ksuid = RunId.generateKsuid(); + await seedRoutedRun(prisma, { + id: ksuid.id, + friendlyId: ksuid.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-scope", + taskIdentifier: "my-task", + }); + + const where = { + runtimeEnvironmentId: environment.id, + idempotencyKey: "idem-scope", + taskIdentifier: "my-task", + }; + + // Default (both tables) and an explicit "both" find the v2 run. + expect((await store.findRun(where))?.id).toBe(ksuid.id); + expect( + (await store.findRun(where, { select: { id: true }, tables: "both" }))?.id + ).toBe(ksuid.id); + + // "legacy" scope skips task_run_v2 entirely, so the v2 run is NOT found. + // This is the hot-path optimisation for an org not cut over to v2: its + // runs only live in TaskRun, so the second (v2) query is always empty and + // can be skipped. (If a caller mis-scopes a genuinely-v2 org to legacy it + // would miss the run — hence it is gated on shouldUseV2RunTable upstream.) + expect(await store.findRun(where, { select: { id: true }, tables: "legacy" })).toBeNull(); + } + ); + + postgresTest( + "clearIdempotencyKey fans out across both tables (byPredicate hits v2; byFriendlyIds partitions a mixed array)", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // byPredicate carries no id, so it must reach task_run_v2 to clear a v2 run. + const v2Pred = RunId.generateKsuid(); + await seedRoutedRun(prisma, { + id: v2Pred.id, + friendlyId: v2Pred.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "kp-v2", + taskIdentifier: "my-task", + }); + + const predResult = await store.clearIdempotencyKey({ + byPredicate: { + idempotencyKey: "kp-v2", + taskIdentifier: "my-task", + runtimeEnvironmentId: environment.id, + }, + }); + expect(predResult.count).toBe(1); + expect( + ( + await prisma.taskRunV2.findFirst({ + where: { id: v2Pred.id }, + select: { idempotencyKey: true }, + }) + )?.idempotencyKey + ).toBeNull(); + + // byFriendlyIds with a MIXED (ksuid + cuid) array must clear rows in BOTH + // physical tables — the partition + sum is the cross-table behaviour. + const v2F = RunId.generateKsuid(); + const legacyF = RunId.generate(); + await seedRoutedRun(prisma, { + id: v2F.id, + friendlyId: v2F.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "kf-v2", + taskIdentifier: "my-task", + }); + await seedRoutedRun(prisma, { + id: legacyF.id, + friendlyId: legacyF.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + idempotencyKey: "kf-legacy", + taskIdentifier: "my-task", + }); + + const friendlyResult = await store.clearIdempotencyKey({ + byFriendlyIds: [v2F.friendlyId, legacyF.friendlyId], + }); + expect(friendlyResult.count).toBe(2); + expect( + ( + await prisma.taskRunV2.findFirst({ + where: { id: v2F.id }, + select: { idempotencyKey: true }, + }) + )?.idempotencyKey + ).toBeNull(); + expect( + ( + await prisma.taskRun.findFirst({ + where: { id: legacyF.id }, + select: { idempotencyKey: true }, + }) + )?.idempotencyKey + ).toBeNull(); + } + ); + postgresTest( "expireRunsBatch with a mixed array updates both tables and returns the combined count", async ({ prisma }) => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index ff78e246f94..d0d9895bd6a 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -13,6 +13,7 @@ import type { CreateFailedRunInput, CreateRunInput, ExpireSnapshotInput, + FindRunTableScope, LockRunData, ReadClient, RescheduleSnapshotInput, @@ -114,8 +115,16 @@ export class PostgresRunStore implements RunStore { async #findFirstAcrossTables( prisma: ReadClient, where: Prisma.TaskRunWhereInput, - args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } + args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }, + tables: FindRunTableScope = "both" ): Promise { + // Legacy-only scope: the caller knows the run can't be in task_run_v2 (e.g. + // idempotency dedup for an org not cut over to v2), so skip the second, + // empty v2 query and keep this a single-table read on the hot path. + if (tables === "legacy") { + return prisma.taskRun.findFirst({ where, ...args }); + } + const v2Model = prisma.taskRunV2 as unknown as typeof prisma.taskRun; const [legacyRun, v2Run] = await Promise.all([ @@ -755,12 +764,12 @@ export class PostgresRunStore implements RunStore { findRun( where: Prisma.TaskRunWhereInput, - args: { select: S }, + args: { select: S; tables?: FindRunTableScope }, client?: ReadClient ): Promise | null>; findRun( where: Prisma.TaskRunWhereInput, - args: { include: I }, + args: { include: I; tables?: FindRunTableScope }, client?: ReadClient ): Promise | null>; findRun( @@ -769,10 +778,12 @@ export class PostgresRunStore implements RunStore { ): Promise; async findRun( where: Prisma.TaskRunWhereInput, - argsOrClient?: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } | ReadClient, + argsOrClient?: + | { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude; tables?: FindRunTableScope } + | ReadClient, client?: ReadClient ): Promise { - const { args, prisma } = this.#resolveReadArgs(argsOrClient, client); + const { args, prisma, tables } = this.#resolveReadArgs(argsOrClient, client); const routingKey = this.#routingKeyOf(where); if (routingKey !== undefined) { @@ -781,8 +792,8 @@ export class PostgresRunStore implements RunStore { } // Non-id predicate (e.g. idempotency-key dedup): the match can be in - // either table, so read both. - return this.#findFirstAcrossTables(prisma, where, args); + // either table, so read both (unless the caller scopes to legacy-only). + return this.#findFirstAcrossTables(prisma, where, args, tables); } findRunOrThrow( @@ -1298,23 +1309,32 @@ export class PostgresRunStore implements RunStore { */ #resolveReadArgs( argsOrClient: - | { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } + | { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude; tables?: FindRunTableScope } | ReadClient | undefined, client: ReadClient | undefined ): { args: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }; prisma: ReadClient; + tables: FindRunTableScope; } { const isProjection = typeof argsOrClient === "object" && argsOrClient !== null && - ("select" in argsOrClient || "include" in argsOrClient); + ("select" in argsOrClient || "include" in argsOrClient || "tables" in argsOrClient); if (isProjection) { + // Split the table-scope hint out of the args that get spread into Prisma + // (which would reject an unknown `tables` field). + const { tables, ...prismaArgs } = argsOrClient as { + select?: Prisma.TaskRunSelect; + include?: Prisma.TaskRunInclude; + tables?: FindRunTableScope; + }; return { - args: argsOrClient as { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude }, + args: prismaArgs, prisma: client ?? this.readOnlyPrisma, + tables: tables ?? "both", }; } @@ -1322,6 +1342,7 @@ export class PostgresRunStore implements RunStore { return { args: {}, prisma: (argsOrClient as ReadClient | undefined) ?? this.readOnlyPrisma, + tables: "both", }; } } diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index 319ef187814..4f2161b727e 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -232,6 +232,18 @@ export type ClearIdempotencyKeyInput = export type TaskRunWithWaitpoint = TaskRun & { associatedWaitpoint: Waitpoint | null }; +/** + * Which physical run tables a non-id `findRun` predicate should read. + * + * Defaults to `"both"` (the safe cross-table behaviour). A caller that KNOWS + * the run can only be in the legacy table — e.g. the idempotency-key dedup for + * an org that is not cut over to `task_run_v2` — can pass `"legacy"` to skip the + * second (empty) `task_run_v2` query and keep the trigger hot path single-table. + * Only meaningful for non-id predicates; id/friendlyId reads already route to + * exactly one table by id format. + */ +export type FindRunTableScope = "both" | "legacy"; + export interface RunStore { // Create createRun(params: CreateRunInput, tx?: PrismaClientOrTransaction): Promise; @@ -332,12 +344,12 @@ export interface RunStore { // Read findRun( where: Prisma.TaskRunWhereInput, - args: { select: S }, + args: { select: S; tables?: FindRunTableScope }, client?: ReadClient ): Promise | null>; findRun( where: Prisma.TaskRunWhereInput, - args: { include: I }, + args: { include: I; tables?: FindRunTableScope }, client?: ReadClient ): Promise | null>; findRun(where: Prisma.TaskRunWhereInput, client?: ReadClient): Promise; From 5e5577ab90dc69133fd921ab6e286f86f5a3e745 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 20:56:04 +0100 Subject: [PATCH 70/83] perf(webapp,run-store): scope cross-table run reads for non-v2 orgs; close remaining cross-table sites Make the v2 path both correct and performant for turning the flag on: - findRuns gains an optional tables: "legacy" | "both" scope (mirroring findRun), threaded through hydrateChildRuns/hydrateParentAndRoot. While an org is not on v2 its runs only live in TaskRun, so callers pass "legacy" to skip the empty task_run_v2 query. - ApiRetrieveRun resolves parent/root and children in parallel (one round-trip instead of two) and scopes the reads to legacy for non-v2 orgs, so the public run-retrieve no longer pays an extra both-table query on every call. - The run-inspector side panel resolves parent/root by id across both tables (was a table-bound relation select that returned null for a cross-table parent in the mixed window). - recover-stuck-runs joins TaskRunExecutionSnapshot against TaskRun UNION task_run_v2 so a stuck v2 run is found and re-enqueued. --- .../v3/ApiRetrieveRunPresenter.server.ts | 32 +++++++++------ .../app/routes/resources.runs.$runParam.ts | 29 ++++++++------ apps/webapp/app/v3/runHierarchy.server.ts | 13 ++++-- .../run-store/src/PostgresRunStore.test.ts | 40 +++++++++++++++++++ .../run-store/src/PostgresRunStore.ts | 17 +++++++- internal-packages/run-store/src/types.ts | 3 ++ scripts/recover-stuck-runs.ts | 12 ++++-- 7 files changed, 113 insertions(+), 33 deletions(-) diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index b2f7b46d554..b3ceb00372b 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -24,6 +24,8 @@ import { import { generatePresignedUrl } from "~/v3/objectStore.server"; import { runStore } from "~/v3/runStore.server"; import { hydrateParentAndRoot, hydrateChildRuns } from "~/v3/runHierarchy.server"; +import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; +import { env as serverEnv } from "~/env.server"; import { tracer } from "~/v3/tracer.server"; import { startSpanWithEnv } from "~/v3/tracing.server"; @@ -147,18 +149,24 @@ export class ApiRetrieveRunPresenter { // legacy run's v2 children), which arise in the mixed window, would come // back null/empty. Resolve parent/root by id (RunStore routes by format) // and children by a both-table predicate. - const { parentTaskRun, rootTaskRun } = await hydrateParentAndRoot( - { parentTaskRunId: pgRow.parentTaskRunId, rootTaskRunId: pgRow.rootTaskRunId }, - { runtimeEnvironmentId: env.id }, - commonRunSelect, - $replica - ); - const childRuns = await hydrateChildRuns( - pgRow.id, - { runtimeEnvironmentId: env.id }, - commonRunSelect, - $replica - ); + // While the org isn't cut over to v2 its runs only live in TaskRun, so + // scope the cross-table reads to "legacy" and skip the empty task_run_v2 + // query; once it's on v2 a child can be cross-table, so read both. The + // parent/root and child reads run in parallel (one round-trip, not two). + const tables = shouldUseV2RunTable(env.organization.featureFlags, { + nativeRealtimeEnabled: serverEnv.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }) + ? "both" + : "legacy"; + const [{ parentTaskRun, rootTaskRun }, childRuns] = await Promise.all([ + hydrateParentAndRoot( + { parentTaskRunId: pgRow.parentTaskRunId, rootTaskRunId: pgRow.rootTaskRunId }, + { runtimeEnvironmentId: env.id, tables }, + commonRunSelect, + $replica + ), + hydrateChildRuns(pgRow.id, { runtimeEnvironmentId: env.id, tables }, commonRunSelect, $replica), + ]); return { ...pgRow, parentTaskRun, rootTaskRun, childRuns, isBuffered: false }; } diff --git a/apps/webapp/app/routes/resources.runs.$runParam.ts b/apps/webapp/app/routes/resources.runs.$runParam.ts index 38e17531f6f..878f611e97c 100644 --- a/apps/webapp/app/routes/resources.runs.$runParam.ts +++ b/apps/webapp/app/routes/resources.runs.$runParam.ts @@ -7,6 +7,7 @@ import { requireUserId } from "~/services/session.server"; import { v3RunParamsSchema } from "~/utils/pathBuilder"; import { machinePresetFromName, machinePresetFromRun } from "~/v3/machinePresets.server"; import { runStore } from "~/v3/runStore.server"; +import { hydrateParentAndRoot } from "~/v3/runHierarchy.server"; import { FINAL_ATTEMPT_STATUSES, isFinalRunStatus } from "~/v3/taskStatus"; export type RunInspectorData = UseDataFunctionReturn; @@ -102,16 +103,11 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { }, }, }, - parentTaskRun: { - select: { - friendlyId: true, - }, - }, - rootTaskRun: { - select: { - friendlyId: true, - }, - }, + // Scalar parent/root pointers, NOT the table-bound relations: a relation + // select resolves null for a cross-table parent/root (a v2 run's legacy + // parent or vice versa in the mixed window). Resolve by id below. + parentTaskRunId: true, + rootTaskRunId: true, }, }, $replica @@ -121,6 +117,15 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { throw new Response("Not found", { status: 404 }); } + // Resolve parent/root across both run tables by id (RunStore routes by id + // format), scoped to this run's environment. + const { parentTaskRun, rootTaskRun } = await hydrateParentAndRoot( + { parentTaskRunId: run.parentTaskRunId, rootTaskRunId: run.rootTaskRunId }, + { runtimeEnvironmentId: run.runtimeEnvironment.id }, + { friendlyId: true }, + $replica + ); + const isFinished = isFinalRunStatus(run.status); const finishedAttempt = isFinished @@ -187,8 +192,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { baseCostInCents: run.baseCostInCents, maxAttempts: run.maxAttempts ?? undefined, version: run.lockedToVersion?.version, - parentTaskRunId: run.parentTaskRun?.friendlyId ?? undefined, - rootTaskRunId: run.rootTaskRun?.friendlyId ?? undefined, + parentTaskRunId: parentTaskRun?.friendlyId ?? undefined, + rootTaskRunId: rootTaskRun?.friendlyId ?? undefined, }, queue: { name: run.queue, diff --git a/apps/webapp/app/v3/runHierarchy.server.ts b/apps/webapp/app/v3/runHierarchy.server.ts index 5d96eb01247..0312ba87927 100644 --- a/apps/webapp/app/v3/runHierarchy.server.ts +++ b/apps/webapp/app/v3/runHierarchy.server.ts @@ -1,4 +1,5 @@ import type { Prisma, PrismaClientOrTransaction, PrismaReplicaClient } from "@trigger.dev/database"; +import type { FindRunTableScope } from "@internal/run-store"; import { runStore } from "~/v3/runStore.server"; type ReadClient = PrismaClientOrTransaction | PrismaReplicaClient; @@ -22,7 +23,7 @@ type ReadClient = PrismaClientOrTransaction | PrismaReplicaClient; */ export async function hydrateParentAndRoot( ids: { parentTaskRunId: string | null; rootTaskRunId: string | null }, - scope: { runtimeEnvironmentId: string }, + scope: { runtimeEnvironmentId: string; tables?: FindRunTableScope }, select: S, client?: ReadClient ): Promise<{ @@ -33,14 +34,14 @@ export async function hydrateParentAndRoot( ids.parentTaskRunId ? runStore.findRun( { id: ids.parentTaskRunId, runtimeEnvironmentId: scope.runtimeEnvironmentId }, - { select }, + { select, tables: scope.tables }, client ) : Promise.resolve(null), ids.rootTaskRunId ? runStore.findRun( { id: ids.rootTaskRunId, runtimeEnvironmentId: scope.runtimeEnvironmentId }, - { select }, + { select, tables: scope.tables }, client ) : Promise.resolve(null), @@ -62,7 +63,7 @@ export async function hydrateParentAndRoot( */ export async function hydrateChildRuns( parentRunId: string, - scope: { runtimeEnvironmentId: string }, + scope: { runtimeEnvironmentId: string; tables?: FindRunTableScope }, select: S, client?: ReadClient ): Promise[]> { @@ -73,6 +74,10 @@ export async function hydrateChildRuns( runtimeEnvironmentId: scope.runtimeEnvironmentId, }, select, + // parentTaskRunId is a non-id predicate, so this reads BOTH tables by + // default. Callers that know the org isn't on v2 pass tables:"legacy" to + // skip the empty task_run_v2 query. + tables: scope.tables, }, client ) as Promise[]>; diff --git a/internal-packages/run-store/src/PostgresRunStore.test.ts b/internal-packages/run-store/src/PostgresRunStore.test.ts index 238ab73726e..d8f92a70b97 100644 --- a/internal-packages/run-store/src/PostgresRunStore.test.ts +++ b/internal-packages/run-store/src/PostgresRunStore.test.ts @@ -2088,6 +2088,46 @@ describe("PostgresRunStore — table routing by id format", () => { } ); + postgresTest( + "findRuns tables:'legacy' skips task_run_v2 (cross-table children hot-path scope)", + async ({ prisma }) => { + const { organization, project, environment } = await seedEnvironment(prisma); + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + + // A legacy (cuid) child and a v2 (ksuid) child of the same parent — the + // cross-table mixed-window hierarchy. + const parentId = RunId.generate().id; + const legacyChild = RunId.generate(); + const v2Child = RunId.generateKsuid(); + for (const child of [legacyChild, v2Child]) { + await seedRoutedRun(prisma, { + id: child.id, + friendlyId: child.friendlyId, + organizationId: organization.id, + projectId: project.id, + runtimeEnvironmentId: environment.id, + parentTaskRunId: parentId, + taskIdentifier: "my-task", + }); + } + + const where = { parentTaskRunId: parentId, runtimeEnvironmentId: environment.id }; + + // Default (both tables) returns both children — required once an org is on v2. + const both = (await store.findRuns({ where, select: { id: true } })) as { id: string }[]; + expect(new Set(both.map((r) => r.id))).toEqual(new Set([legacyChild.id, v2Child.id])); + + // "legacy" scope skips task_run_v2 — the hot-path optimisation for an org + // not cut over to v2 (no v2 children exist), so only the legacy child. + const legacy = (await store.findRuns({ + where, + select: { id: true }, + tables: "legacy", + })) as { id: string }[]; + expect(legacy.map((r) => r.id)).toEqual([legacyChild.id]); + } + ); + postgresTest( "clearIdempotencyKey fans out across both tables (byPredicate hits v2; byFriendlyIds partitions a mixed array)", async ({ prisma }) => { diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index d0d9895bd6a..e083af6b4ed 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -840,6 +840,7 @@ export class PostgresRunStore implements RunStore { take?: number; skip?: number; cursor?: Prisma.TaskRunWhereUniqueInput; + tables?: FindRunTableScope; }, client?: ReadClient ): Promise[]>; @@ -851,6 +852,7 @@ export class PostgresRunStore implements RunStore { take?: number; skip?: number; cursor?: Prisma.TaskRunWhereUniqueInput; + tables?: FindRunTableScope; }, client?: ReadClient ): Promise[]>; @@ -861,11 +863,12 @@ export class PostgresRunStore implements RunStore { take?: number; skip?: number; cursor?: Prisma.TaskRunWhereUniqueInput; + tables?: FindRunTableScope; }, client?: ReadClient ): Promise; async findRuns( - args: { + rawArgs: { where: Prisma.TaskRunWhereInput; select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude; @@ -873,11 +876,16 @@ export class PostgresRunStore implements RunStore { take?: number; skip?: number; cursor?: Prisma.TaskRunWhereUniqueInput; + tables?: FindRunTableScope; }, client?: ReadClient ): Promise { const prisma = client ?? this.readOnlyPrisma; + // Split the table-scope hint out of the args that get spread into Prisma + // (which would reject an unknown `tables` field) before anything reads them. + const { tables: tableScope = "both", ...args } = rawArgs; + // A run lives in exactly one physical table, chosen by its id format. An // `id: { in: [...] }` predicate of a single id format addresses ONE table; // any other predicate may span both `TaskRun` (legacy cuid) and @@ -887,7 +895,12 @@ export class PostgresRunStore implements RunStore { const legacyModel = prisma.taskRun; const v2Model = prisma.taskRunV2 as unknown as typeof prisma.taskRun; - const { queryLegacy, queryV2 } = this.#tablesForWhere(args.where); + const tablesForWhere = this.#tablesForWhere(args.where); + const queryLegacy = tablesForWhere.queryLegacy; + // A "legacy" scope hint (the caller knows the run can't be in task_run_v2 — + // e.g. an org not cut over to v2) skips the empty v2 query, the same + // hot-path optimisation findRun uses. + const queryV2 = tableScope === "legacy" ? false : tablesForWhere.queryV2; // No candidate table (e.g. an empty `id: { in: [] }`) → matches nothing. if (!queryLegacy && !queryV2) { diff --git a/internal-packages/run-store/src/types.ts b/internal-packages/run-store/src/types.ts index 4f2161b727e..a64476cce32 100644 --- a/internal-packages/run-store/src/types.ts +++ b/internal-packages/run-store/src/types.ts @@ -374,6 +374,7 @@ export interface RunStore { take?: number; skip?: number; cursor?: Prisma.TaskRunWhereUniqueInput; + tables?: FindRunTableScope; }, client?: ReadClient ): Promise[]>; @@ -385,6 +386,7 @@ export interface RunStore { take?: number; skip?: number; cursor?: Prisma.TaskRunWhereUniqueInput; + tables?: FindRunTableScope; }, client?: ReadClient ): Promise[]>; @@ -395,6 +397,7 @@ export interface RunStore { take?: number; skip?: number; cursor?: Prisma.TaskRunWhereUniqueInput; + tables?: FindRunTableScope; }, client?: ReadClient ): Promise; diff --git a/scripts/recover-stuck-runs.ts b/scripts/recover-stuck-runs.ts index 28bb4e85e46..0a45ee9e127 100755 --- a/scripts/recover-stuck-runs.ts +++ b/scripts/recover-stuck-runs.ts @@ -188,8 +188,10 @@ async function main() { console.log(`📊 Found ${runIds.length} runs in currentConcurrency set`); // Query database for latest snapshots and queue info of these runs. - // NOTE: raw join of TaskRunExecutionSnapshot to TaskRun, the one TaskRun read not behind - // RunStore (a join, not a by-id read, in an ops script). Revisit at table cutover. + // A snapshot's runId can reference a run in EITHER physical table during + // the runTableV2 cutover, so join against TaskRun UNION task_run_v2 by id; + // a stuck v2 (KSUID) run would otherwise be dropped from the join and never + // re-enqueued. (Raw join in an ops script, not a by-id RunStore read.) const runInfo = await prisma.$queryRaw< Array<{ runId: string; @@ -214,7 +216,11 @@ async function main() { r."queue", r."concurrencyKey" FROM "TaskRunExecutionSnapshot" s - INNER JOIN "TaskRun" r ON r.id = s."runId" + INNER JOIN ( + SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM "TaskRun" + UNION ALL + SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM task_run_v2 + ) r ON r.id = s."runId" WHERE s."runId" = ANY(${runIds}) AND s."isValid" = true ORDER BY s."runId", s."createdAt" DESC From 43dffdf8ed25cd000fba28628a2dfbdce06316f4 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 21:04:33 +0100 Subject: [PATCH 71/83] fix(webapp): resolve realtime stream parent/root target across both run tables The two realtime v1-streams routes (read/create + append) resolved a target:"parent"|"root" via a table-bound parentTaskRun/rootTaskRun relation select, which returns null for a cross-table parent/root in the runTableV2 mixed window and 404s a target that exists. Select the scalar parentTaskRunId/rootTaskRunId and resolve the target by id through RunStore (routes by id format), matching the presenter fixes. The "self" target is unchanged. --- ...streams.$runId.$target.$streamId.append.ts | 36 +++++----- ...ime.v1.streams.$runId.$target.$streamId.ts | 70 ++++++++++--------- 2 files changed, 58 insertions(+), 48 deletions(-) diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts index 7cb813a6dec..ec9c11568cb 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.append.ts @@ -36,16 +36,8 @@ const { action } = createActionApiRoute( select: { id: true, friendlyId: true, - parentTaskRun: { - select: { - friendlyId: true, - }, - }, - rootTaskRun: { - select: { - friendlyId: true, - }, - }, + parentTaskRunId: true, + rootTaskRunId: true, }, }, $replica @@ -55,12 +47,24 @@ const { action } = createActionApiRoute( return new Response("Run not found", { status: 404 }); } - const targetId = - params.target === "self" - ? run.friendlyId - : params.target === "parent" - ? run.parentTaskRun?.friendlyId - : run.rootTaskRun?.friendlyId; + // parentTaskRunId/rootTaskRunId are scalar ids that may point at a run in + // the OTHER physical table (the runTableV2 mixed window), so resolve the + // target's friendlyId by id (RunStore routes by id format) rather than via a + // table-bound relation select, which would return null cross-table. + let targetId: string | undefined; + if (params.target === "self") { + targetId = run.friendlyId; + } else { + const targetScalarId = params.target === "parent" ? run.parentTaskRunId : run.rootTaskRunId; + if (targetScalarId) { + const target = await runStore.findRun( + { id: targetScalarId, runtimeEnvironmentId: authentication.environment.id }, + { select: { friendlyId: true } }, + $replica + ); + targetId = target?.friendlyId; + } + } if (!targetId) { return new Response("Target not found", { status: 404 }); diff --git a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts index c71ad48d121..fa2cee9b110 100644 --- a/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts +++ b/apps/webapp/app/routes/realtime.v1.streams.$runId.$target.$streamId.ts @@ -14,6 +14,23 @@ const ParamsSchema = z.object({ streamId: z.string(), }); +// Resolve a parent/root stream target across BOTH run tables. The scalar +// parentTaskRunId/rootTaskRunId may reference a run in the other physical table +// during the runTableV2 mixed window; findRun routes by id format, so this +// resolves the target whichever table it lives in (a table-bound relation +// select would resolve null for a cross-table parent/root). +async function resolveStreamTargetById( + targetScalarId: string | null, + runtimeEnvironmentId: string +): Promise<{ friendlyId: string; streamBasinName: string | null } | null> { + if (!targetScalarId) return null; + return runStore.findRun( + { id: targetScalarId, runtimeEnvironmentId }, + { select: { friendlyId: true, streamBasinName: true } }, + $replica + ); +} + const { action } = createActionApiRoute( { params: ParamsSchema, @@ -29,18 +46,8 @@ const { action } = createActionApiRoute( id: true, friendlyId: true, streamBasinName: true, - parentTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, - }, - }, - rootTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, - }, - }, + parentTaskRunId: true, + rootTaskRunId: true, }, }, $replica @@ -50,12 +57,18 @@ const { action } = createActionApiRoute( return new Response("Run not found", { status: 404 }); } + // Resolve the target across BOTH run tables. parentTaskRunId/rootTaskRunId + // are scalar pointers that may reference a run in the OTHER physical table + // (the runTableV2 mixed window); a table-bound relation select would resolve + // null and 404 a target that exists. findRun routes by id format; "self" is + // the run itself. const targetRun = params.target === "self" - ? run - : params.target === "parent" - ? run.parentTaskRun - : run.rootTaskRun; + ? { friendlyId: run.friendlyId, streamBasinName: run.streamBasinName } + : await resolveStreamTargetById( + params.target === "parent" ? run.parentTaskRunId : run.rootTaskRunId, + authentication.environment.id + ); if (!targetRun?.friendlyId) { return new Response("Target not found", { status: 404 }); @@ -164,18 +177,8 @@ const loader = createLoaderApiRoute( id: true, friendlyId: true, streamBasinName: true, - parentTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, - }, - }, - rootTaskRun: { - select: { - friendlyId: true, - streamBasinName: true, - }, - }, + parentTaskRunId: true, + rootTaskRunId: true, }, }, $replica @@ -187,12 +190,15 @@ const loader = createLoaderApiRoute( return new Response("Run not found", { status: 404 }); } + // Resolve the target across both run tables by id (the scalar parent/root + // pointer may be cross-table in the mixed window); "self" is the run itself. const targetRun = params.target === "self" - ? run - : params.target === "parent" - ? run.parentTaskRun - : run.rootTaskRun; + ? { friendlyId: run.friendlyId, streamBasinName: run.streamBasinName } + : await resolveStreamTargetById( + params.target === "parent" ? run.parentTaskRunId : run.rootTaskRunId, + authentication.environment.id + ); if (!targetRun?.friendlyId) { return new Response("Target not found", { status: 404 }); From e3393de3cdcdabf46e4fe8e400f55571bcca4e74 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 21:16:31 +0100 Subject: [PATCH 72/83] fix(webapp): scope cross-table run reads on the native switch, not the per-org flag A run's physical table is fixed by its id format, not an org's current runTableV2 flag. An org that was on v2 then flipped the flag off still holds v2 runs (they stay readable, routed by id), so scoping the dedup and hierarchy reads to "legacy" off the per-org flag would miss those v2 runs: it would silently drop a v2 run's children and parent on retrieve, and let a duplicate through idempotency dedup. Gate the scope on whether ANY v2 run can exist in the deployment (the native realtime master switch) instead. While native is off no v2 run exists anywhere, so "legacy" is safe and still skips the empty task_run_v2 query; once native is on, every read covers both tables. --- .../v3/ApiRetrieveRunPresenter.server.ts | 20 +++++++++---------- .../concerns/idempotencyKeys.server.ts | 19 ++++++++++++------ apps/webapp/app/v3/runHierarchy.server.ts | 6 ++++-- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index b3ceb00372b..2f391f6748c 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -24,7 +24,6 @@ import { import { generatePresignedUrl } from "~/v3/objectStore.server"; import { runStore } from "~/v3/runStore.server"; import { hydrateParentAndRoot, hydrateChildRuns } from "~/v3/runHierarchy.server"; -import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; import { env as serverEnv } from "~/env.server"; import { tracer } from "~/v3/tracer.server"; import { startSpanWithEnv } from "~/v3/tracing.server"; @@ -149,15 +148,16 @@ export class ApiRetrieveRunPresenter { // legacy run's v2 children), which arise in the mixed window, would come // back null/empty. Resolve parent/root by id (RunStore routes by format) // and children by a both-table predicate. - // While the org isn't cut over to v2 its runs only live in TaskRun, so - // scope the cross-table reads to "legacy" and skip the empty task_run_v2 - // query; once it's on v2 a child can be cross-table, so read both. The - // parent/root and child reads run in parallel (one round-trip, not two). - const tables = shouldUseV2RunTable(env.organization.featureFlags, { - nativeRealtimeEnabled: serverEnv.REALTIME_BACKEND_NATIVE_ENABLED === "1", - }) - ? "both" - : "legacy"; + // Scope the cross-table reads on whether ANY v2 run can exist in this + // deployment (the native master switch), NOT the org's current flag: a + // run's table is fixed by its id format, and an org that was on v2 then + // flipped off still HAS v2 runs (and v2 children) — runTableV2.server.ts + // documents that they stay readable. pgRow is routed here by id format, so + // it can be a v2 run for a now-non-v2 org; scoping to "legacy" off the + // per-org flag would then silently drop its v2 children/parent. While + // native is off no v2 run exists anywhere, so "legacy" is safe for all and + // skips the empty task_run_v2 query. The reads also run in parallel. + const tables = serverEnv.REALTIME_BACKEND_NATIVE_ENABLED === "1" ? "both" : "legacy"; const [{ parentTaskRun, rootTaskRun }, childRuns] = await Promise.all([ hydrateParentAndRoot( { parentTaskRunId: pgRow.parentTaskRunId, rootTaskRunId: pgRow.rootTaskRunId }, diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 4c97084b343..b21efc97917 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -302,11 +302,8 @@ export class IdempotencyKeyConcern { return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } - // Resolve the org's v2-cutover state ONCE: it gates both the cross-table - // idempotency lookup below and the pre-gate claim further down. While the - // org is not on v2 (the default, and every org while native realtime is - // off) the run can only be in the legacy table, so scope the dedup read to - // "legacy" and skip the empty task_run_v2 query on the trigger hot path. + // Resolve whether THIS org currently mints v2 runs ONCE, for the pre-gate + // claim further down (claimEligible). const orgFeatureFlags = (request.environment.organization?.featureFlags as | Record @@ -316,6 +313,16 @@ export class IdempotencyKeyConcern { nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", }); + // Scope the idempotency dedup read on whether ANY v2 run can exist in this + // deployment (the native master switch), NOT on whether this org currently + // mints v2. A run's table is fixed by its id format, so an org that was on + // v2 then flipped off still holds v2 runs an idempotency key can match + // (runTableV2.server.ts documents they stay readable); gating the read on + // orgUsesV2 would miss them and let a duplicate through. While native is off + // no v2 run exists anywhere, so "legacy" is safe and skips the empty + // task_run_v2 query on the trigger hot path; once native is on, read both. + const anyV2RunsPossible = env.REALTIME_BACKEND_NATIVE_ENABLED === "1"; + const existingRun = idempotencyKey ? await runStore.findRun( { @@ -327,7 +334,7 @@ export class IdempotencyKeyConcern { include: { associatedWaitpoint: true, }, - tables: orgUsesV2 ? "both" : "legacy", + tables: anyV2RunsPossible ? "both" : "legacy", }, this.prisma ) diff --git a/apps/webapp/app/v3/runHierarchy.server.ts b/apps/webapp/app/v3/runHierarchy.server.ts index 0312ba87927..5d71442df1c 100644 --- a/apps/webapp/app/v3/runHierarchy.server.ts +++ b/apps/webapp/app/v3/runHierarchy.server.ts @@ -75,8 +75,10 @@ export async function hydrateChildRuns( }, select, // parentTaskRunId is a non-id predicate, so this reads BOTH tables by - // default. Callers that know the org isn't on v2 pass tables:"legacy" to - // skip the empty task_run_v2 query. + // default. Callers that know no v2 run can exist (native realtime off, so + // task_run_v2 is empty deployment-wide) pass tables:"legacy" to skip the + // empty query. Scope on the deployment switch, NOT a per-org flag: a run's + // table is fixed by id format, so a flipped-off org still has v2 children. tables: scope.tables, }, client From 7c5f81b5c5ad672d268c184c00cf1412b417a709 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 21:21:59 +0100 Subject: [PATCH 73/83] chore(run-store,scripts): harden cross-table reads for a future v2 copy window Forward-looking robustness: - recover-stuck-runs joins TaskRun UNION (not UNION ALL) task_run_v2, so if a later copy step leaves a run briefly in both tables under the same id the identical clones collapse to one row and DISTINCT ON stays unambiguous. - Document that the findRuns "legacy" scope hint is for non-id predicates; an id read already routes by format, so the hint is redundant there (and a KSUID-only id predicate scoped legacy correctly returns nothing). --- internal-packages/run-store/src/PostgresRunStore.ts | 10 +++++++--- scripts/recover-stuck-runs.ts | 7 +++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/internal-packages/run-store/src/PostgresRunStore.ts b/internal-packages/run-store/src/PostgresRunStore.ts index e083af6b4ed..5d474b3951c 100644 --- a/internal-packages/run-store/src/PostgresRunStore.ts +++ b/internal-packages/run-store/src/PostgresRunStore.ts @@ -897,9 +897,13 @@ export class PostgresRunStore implements RunStore { const tablesForWhere = this.#tablesForWhere(args.where); const queryLegacy = tablesForWhere.queryLegacy; - // A "legacy" scope hint (the caller knows the run can't be in task_run_v2 — - // e.g. an org not cut over to v2) skips the empty v2 query, the same - // hot-path optimisation findRun uses. + // A "legacy" scope hint (the caller knows no run can be in task_run_v2 — e.g. + // no v2 run exists anywhere because native realtime is off) skips the empty v2 + // query, the same hot-path optimisation findRun uses. It is meant for non-id + // predicates (parentTaskRunId, idempotencyKey); pairing it with a KSUID-only + // id predicate forces both tables off and returns [] (correct: a v2 id can't + // match a legacy-only read), but id reads already route by format, so the + // hint is redundant there rather than passed by real callers. const queryV2 = tableScope === "legacy" ? false : tablesForWhere.queryV2; // No candidate table (e.g. an empty `id: { in: [] }`) → matches nothing. diff --git a/scripts/recover-stuck-runs.ts b/scripts/recover-stuck-runs.ts index 0a45ee9e127..7770a455aec 100755 --- a/scripts/recover-stuck-runs.ts +++ b/scripts/recover-stuck-runs.ts @@ -191,7 +191,10 @@ async function main() { // A snapshot's runId can reference a run in EITHER physical table during // the runTableV2 cutover, so join against TaskRun UNION task_run_v2 by id; // a stuck v2 (KSUID) run would otherwise be dropped from the join and never - // re-enqueued. (Raw join in an ops script, not a by-id RunStore read.) + // re-enqueued. UNION (not UNION ALL) so that if a future copy step leaves a + // run briefly in both tables under the same id, the identical clones collapse + // to one row and DISTINCT ON stays unambiguous. (Raw join in an ops script, + // not a by-id RunStore read.) const runInfo = await prisma.$queryRaw< Array<{ runId: string; @@ -218,7 +221,7 @@ async function main() { FROM "TaskRunExecutionSnapshot" s INNER JOIN ( SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM "TaskRun" - UNION ALL + UNION SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM task_run_v2 ) r ON r.id = s."runId" WHERE s."runId" = ANY(${runIds}) From 8b6a7ca41c5512e09a970139935a48e8fa44876b Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Mon, 22 Jun 2026 21:28:02 +0100 Subject: [PATCH 74/83] docs(webapp): tighten the native-switch scope comments State that no v2 run exists "until native is enabled" (minting requires it) rather than the absolute "while native is off", which is not true after a deployment-wide native rollback. Comment-only. --- .../presenters/v3/ApiRetrieveRunPresenter.server.ts | 12 ++++++------ .../app/runEngine/concerns/idempotencyKeys.server.ts | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index 2f391f6748c..56450b0fb1d 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -151,12 +151,12 @@ export class ApiRetrieveRunPresenter { // Scope the cross-table reads on whether ANY v2 run can exist in this // deployment (the native master switch), NOT the org's current flag: a // run's table is fixed by its id format, and an org that was on v2 then - // flipped off still HAS v2 runs (and v2 children) — runTableV2.server.ts - // documents that they stay readable. pgRow is routed here by id format, so - // it can be a v2 run for a now-non-v2 org; scoping to "legacy" off the - // per-org flag would then silently drop its v2 children/parent. While - // native is off no v2 run exists anywhere, so "legacy" is safe for all and - // skips the empty task_run_v2 query. The reads also run in parallel. + // flipped off still HAS v2 runs (and v2 children) that runTableV2.server.ts + // documents as staying readable. pgRow is routed here by id format, so it + // can be a v2 run for a now-non-v2 org; scoping to "legacy" off the per-org + // flag would then silently drop its v2 children/parent. Until native is + // enabled no v2 run exists yet (minting requires it), so "legacy" is safe + // and skips the empty task_run_v2 query. The reads also run in parallel. const tables = serverEnv.REALTIME_BACKEND_NATIVE_ENABLED === "1" ? "both" : "legacy"; const [{ parentTaskRun, rootTaskRun }, childRuns] = await Promise.all([ hydrateParentAndRoot( diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index b21efc97917..efadf7d2c4a 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -318,9 +318,10 @@ export class IdempotencyKeyConcern { // mints v2. A run's table is fixed by its id format, so an org that was on // v2 then flipped off still holds v2 runs an idempotency key can match // (runTableV2.server.ts documents they stay readable); gating the read on - // orgUsesV2 would miss them and let a duplicate through. While native is off - // no v2 run exists anywhere, so "legacy" is safe and skips the empty - // task_run_v2 query on the trigger hot path; once native is on, read both. + // orgUsesV2 would miss them and let a duplicate through. Until native is + // enabled no v2 run exists yet (minting requires it), so "legacy" is safe and + // skips the empty task_run_v2 query on the trigger hot path; once native is + // on, read both. const anyV2RunsPossible = env.REALTIME_BACKEND_NATIVE_ENABLED === "1"; const existingRun = idempotencyKey From aff23d931a1122994ae1ae95d7dc630faa05f5cc Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 01:00:22 +0100 Subject: [PATCH 75/83] fix(database,webapp): task_run_v2/TaskRun index parity, native-only v2 enable guard, FK-drift guard Make task_run_v2 a faithful (non-degraded) clone of TaskRun and make the v2 cutover safe to actually enable: - Add the INCLUDE (id) WITH (fillfactor=90) covering clause to task_run_v2's (runtimeEnvironmentId, createdAt DESC) index so the dashboard run-list query keeps index-only scans, matching TaskRun. Columns were already identical; v2 now has every index TaskRun has (plus a (createdAt, id) keyset index it needs for the cross-table cursor merge). - Reject enabling runTableV2 for an org unless realtimeBackend is "native" (validateFeatureFlagInvariants), wired into both admin feature-flag write routes. A v2 run minted while the org is still on Electric is realtime invisible, so block the bad combination at write time, not just at read time. - Add a guard test that fails CI if a generated migration re-adds an incoming foreign key to TaskRun (after the decoupling drop) or adds one referencing task_run_v2: the Prisma drift footgun that would re-couple the tables or break cross-table run creation. --- ...i.v1.orgs.$organizationId.feature-flags.ts | 10 ++- ...i.v2.orgs.$organizationId.feature-flags.ts | 16 +++- apps/webapp/app/v3/featureFlags.ts | 39 ++++++++++ .../webapp/test/featureFlagInvariants.test.ts | 44 +++++++++++ apps/webapp/test/runTableFkDriftGuard.test.ts | 74 +++++++++++++++++++ .../migration.sql | 15 ++++ 6 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 apps/webapp/test/featureFlagInvariants.test.ts create mode 100644 apps/webapp/test/runTableFkDriftGuard.test.ts create mode 100644 internal-packages/database/prisma/migrations/20260623090000_task_run_v2_covering_index/migration.sql diff --git a/apps/webapp/app/routes/admin.api.v1.orgs.$organizationId.feature-flags.ts b/apps/webapp/app/routes/admin.api.v1.orgs.$organizationId.feature-flags.ts index 513616470a0..621143c54b4 100644 --- a/apps/webapp/app/routes/admin.api.v1.orgs.$organizationId.feature-flags.ts +++ b/apps/webapp/app/routes/admin.api.v1.orgs.$organizationId.feature-flags.ts @@ -2,7 +2,7 @@ import { ActionFunctionArgs, LoaderFunctionArgs, json } from "@remix-run/server- import { z } from "zod"; import { prisma } from "~/db.server"; import { requireAdminApiRequest } from "~/services/personalAccessToken.server"; -import { validatePartialFeatureFlags } from "~/v3/featureFlags"; +import { validateFeatureFlagInvariants, validatePartialFeatureFlags } from "~/v3/featureFlags"; const ParamsSchema = z.object({ organizationId: z.string(), @@ -85,6 +85,14 @@ export async function action({ request, params }: ActionFunctionArgs) { ...validationResult.data, }; + // Enforce cross-flag invariants on the merged result (e.g. runTableV2 + // requires realtimeBackend=native). Checked on the merge so it also rejects + // turning realtime back to Electric while runTableV2 stays on. + const invariant = validateFeatureFlagInvariants(mergedFlags); + if (!invariant.ok) { + return json({ error: invariant.error }, { status: 400 }); + } + // Update the organization's feature flags const updatedOrganization = await prisma.organization.update({ where: { diff --git a/apps/webapp/app/routes/admin.api.v2.orgs.$organizationId.feature-flags.ts b/apps/webapp/app/routes/admin.api.v2.orgs.$organizationId.feature-flags.ts index 6081febb526..c2a85833f72 100644 --- a/apps/webapp/app/routes/admin.api.v2.orgs.$organizationId.feature-flags.ts +++ b/apps/webapp/app/routes/admin.api.v2.orgs.$organizationId.feature-flags.ts @@ -5,7 +5,12 @@ import { z } from "zod"; import { prisma } from "~/db.server"; import { requireUser } from "~/services/session.server"; import { flags as getGlobalFlags } from "~/v3/featureFlags.server"; -import { FEATURE_FLAG, validatePartialFeatureFlags, getAllFlagControlTypes } from "~/v3/featureFlags"; +import { + FEATURE_FLAG, + validateFeatureFlagInvariants, + validatePartialFeatureFlags, + getAllFlagControlTypes, +} from "~/v3/featureFlags"; import { featuresForRequest } from "~/features.server"; // Session-auth route for the admin feature flags dialog. @@ -113,6 +118,15 @@ export async function action({ request, params }: ActionFunctionArgs) { { status: 400 } ); } + + // Enforce cross-flag invariants (e.g. runTableV2 requires + // realtimeBackend=native). This route replaces the whole set, so the + // validated data IS the final resolved set. + const invariant = validateFeatureFlagInvariants(validationResult.data); + if (!invariant.ok) { + return json({ error: invariant.error }, { status: 400 }); + } + featureFlags = validationResult.data; } diff --git a/apps/webapp/app/v3/featureFlags.ts b/apps/webapp/app/v3/featureFlags.ts index a0a6146b485..2a51a9be8ea 100644 --- a/apps/webapp/app/v3/featureFlags.ts +++ b/apps/webapp/app/v3/featureFlags.ts @@ -95,6 +95,45 @@ export function validatePartialFeatureFlags(values: Record) { return FeatureFlagCatalogSchema.partial().safeParse(values); } +/** + * Cross-field invariant on a RESOLVED org flag set: `runTableV2` may only be on + * when the org's `realtimeBackend` is "native". + * + * New v2 runs mint a KSUID id (routing them to task_run_v2) and are only + * observable in realtime on the native backend; Electric is bound to + * public."TaskRun", so a v2 run minted while the org is still on Electric is + * invisible in realtime. `shouldUseV2RunTable` already enforces this at read + * time, but this guard blocks the dangerous combination at WRITE time so it can + * never be configured, including the enable-race where `runTableV2` is flipped + * on before `realtimeBackend=native` has propagated past the realtime cache. + * + * Pass the FINAL resolved set (after any merge) so it also rejects turning + * `realtimeBackend` off/to "electric" while `runTableV2` is still on. + */ +export function validateFeatureFlagInvariants( + flags: Record +): { ok: true } | { ok: false; error: string } { + const runTableV2 = FeatureFlagCatalog[FEATURE_FLAG.runTableV2].safeParse( + flags[FEATURE_FLAG.runTableV2] + ); + if (!(runTableV2.success && runTableV2.data === true)) { + return { ok: true }; + } + + const backend = FeatureFlagCatalog[FEATURE_FLAG.realtimeBackend].safeParse( + flags[FEATURE_FLAG.realtimeBackend] + ); + if (backend.success && backend.data === "native") { + return { ok: true }; + } + + return { + ok: false, + error: + 'runTableV2 can only be enabled when realtimeBackend is "native". Set realtimeBackend="native" first (and let it propagate past the realtime cache), then enable runTableV2.', + }; +} + // Utility types for catalog-driven UI rendering export type FlagControlType = | { type: "boolean" } diff --git a/apps/webapp/test/featureFlagInvariants.test.ts b/apps/webapp/test/featureFlagInvariants.test.ts new file mode 100644 index 00000000000..ff4c4d48ce0 --- /dev/null +++ b/apps/webapp/test/featureFlagInvariants.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from "vitest"; +import { validateFeatureFlagInvariants } from "~/v3/featureFlags"; + +describe("validateFeatureFlagInvariants (runTableV2 requires native realtime)", () => { + it("allows runTableV2 on when realtimeBackend is native", () => { + expect( + validateFeatureFlagInvariants({ runTableV2: true, realtimeBackend: "native" }).ok + ).toBe(true); + }); + + it("rejects runTableV2 on while realtimeBackend is electric", () => { + expect( + validateFeatureFlagInvariants({ runTableV2: true, realtimeBackend: "electric" }).ok + ).toBe(false); + }); + + it("rejects runTableV2 on while realtimeBackend is shadow", () => { + expect( + validateFeatureFlagInvariants({ runTableV2: true, realtimeBackend: "shadow" }).ok + ).toBe(false); + }); + + it("rejects runTableV2 on when realtimeBackend is unset (defaults to electric)", () => { + expect(validateFeatureFlagInvariants({ runTableV2: true }).ok).toBe(false); + }); + + it("allows runTableV2 off or absent regardless of backend", () => { + expect(validateFeatureFlagInvariants({ runTableV2: false }).ok).toBe(true); + expect( + validateFeatureFlagInvariants({ runTableV2: false, realtimeBackend: "electric" }).ok + ).toBe(true); + expect(validateFeatureFlagInvariants({}).ok).toBe(true); + expect(validateFeatureFlagInvariants({ realtimeBackend: "electric" }).ok).toBe(true); + }); + + it("ignores a stringified runTableV2 (strict boolean) and does not constrain", () => { + // runTableV2 is a strict z.boolean(); a stringified "true" fails the parse, + // so the invariant treats it as not-enabled (the write would be rejected by + // the flag schema itself before reaching here). + expect( + validateFeatureFlagInvariants({ runTableV2: "true", realtimeBackend: "electric" }).ok + ).toBe(true); + }); +}); diff --git a/apps/webapp/test/runTableFkDriftGuard.test.ts b/apps/webapp/test/runTableFkDriftGuard.test.ts new file mode 100644 index 00000000000..224e9a7c279 --- /dev/null +++ b/apps/webapp/test/runTableFkDriftGuard.test.ts @@ -0,0 +1,74 @@ +import { readdirSync, readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { describe, expect, it } from "vitest"; + +// internal-packages/database/prisma/migrations, resolved from this test file +// (apps/webapp/test) up to the repo root. +const MIGRATIONS_DIR = join( + dirname(fileURLToPath(import.meta.url)), + "../../../internal-packages/database/prisma/migrations" +); + +// The migration that physically dropped every incoming foreign key to TaskRun, +// decoupling the run tables so a run can live in either TaskRun or task_run_v2. +const DROP_FKS_MIGRATION = "20260619120042_drop_taskrun_incoming_fks"; + +/** + * Guard against the Prisma FK-drift footgun for the parallel run tables. + * + * schema.prisma still declares the (deliberately dropped) incoming relations to + * TaskRun AND mirror relations to task_run_v2, so a routine `prisma migrate dev` + * for any unrelated change regenerates a migration that re-adds those foreign + * keys. Re-adding them is destructive: + * - a re-added TaskRun incoming FK silently re-couples the two tables, defeating + * the whole parallel-table design; and + * - any FK referencing task_run_v2 fails on existing legacy-pointing child rows + * and then rejects every cross-table child insert. + * + * Whoever generates a migration must strip these (the established practice). + * This test fails CI if an unstripped migration ever lands, so the parity can't + * silently drift back. + */ +describe("run-table FK-drift guard", () => { + const migrationDirs = readdirSync(MIGRATIONS_DIR, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .map((entry) => entry.name) + .sort(); + + const sqlOf = (name: string) => + readFileSync(join(MIGRATIONS_DIR, name, "migration.sql"), "utf8"); + + // A statement that ADDs a foreign key referencing `table`. Checked per + // statement (split on ;) so FOREIGN KEY in one statement can't pair with + // REFERENCES in a later one. + const addsForeignKeyReferencing = (sql: string, table: string) => + sql + .split(";") + .some( + (stmt) => /FOREIGN KEY/i.test(stmt) && new RegExp(`REFERENCES\\s+"${table}"`, "i").test(stmt) + ); + + it("finds the migrations directory and the FK-drop migration", () => { + expect(migrationDirs.length).toBeGreaterThan(0); + expect(migrationDirs).toContain(DROP_FKS_MIGRATION); + }); + + it("no migration EVER adds a foreign key referencing task_run_v2", () => { + const offenders = migrationDirs.filter((dir) => addsForeignKeyReferencing(sqlOf(dir), "task_run_v2")); + expect( + offenders, + `These migrations add a destructive FK referencing task_run_v2 (a child row can point at a legacy run, so the constraint fails on existing data): ${offenders.join(", ")}. Strip the *_v2_fkey constraints from the generated migration.` + ).toEqual([]); + }); + + it("no migration after the FK-drop re-adds an incoming foreign key to TaskRun", () => { + const dropIdx = migrationDirs.indexOf(DROP_FKS_MIGRATION); + const after = migrationDirs.slice(dropIdx + 1); + const offenders = after.filter((dir) => addsForeignKeyReferencing(sqlOf(dir), "TaskRun")); + expect( + offenders, + `These migrations re-add an incoming FK to TaskRun that was deliberately dropped (it re-couples the run tables): ${offenders.join(", ")}. Strip the TaskRun *_fkey constraints from the generated migration.` + ).toEqual([]); + }); +}); diff --git a/internal-packages/database/prisma/migrations/20260623090000_task_run_v2_covering_index/migration.sql b/internal-packages/database/prisma/migrations/20260623090000_task_run_v2_covering_index/migration.sql new file mode 100644 index 00000000000..e8f480e39a8 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260623090000_task_run_v2_covering_index/migration.sql @@ -0,0 +1,15 @@ +-- Bring task_run_v2's run-list index to parity with TaskRun's +-- (TaskRun_runtimeEnvironmentId_createdAt_idx, added in migration +-- 20250611080322): add the INCLUDE (id) covering column and fillfactor 90 so the +-- dashboard run-list query keeps index-only scans and the same page packing once +-- v2 carries volume. Without this, v2 run-list reads do heap fetches the legacy +-- table avoids. +-- +-- task_run_v2 is empty until an org cuts over to v2 run ids (gated on the native +-- realtime backend), and this migration deploys before any opt-in, so the +-- DROP/CREATE is effectively instant and runs safely inside the migration +-- transaction (no CONCURRENTLY needed, unlike the original TaskRun migration +-- which ran against a populated table). +DROP INDEX IF EXISTS "task_run_v2_runtimeEnvironmentId_createdAt_idx"; + +CREATE INDEX "task_run_v2_runtimeEnvironmentId_createdAt_idx" ON "task_run_v2"("runtimeEnvironmentId", "createdAt" DESC) INCLUDE ("id") WITH (fillfactor = 90); From e3203f514eca4897b35be9d0cd0bd97153f4969d Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 01:09:31 +0100 Subject: [PATCH 76/83] fix(database): type the task_run_v2 mirror relations as nullable The child models (TaskRunAttempt, TaskRunWaitpoint, TaskRunDependency, BatchTaskRunItem, Checkpoint, CheckpointRestoreEvent, TaskRunExecutionSnapshot, BulkActionItem) declared their taskRunV2/runV2 mirror relation as non-nullable, but a child row references a run in exactly one table, so the mirror resolves to null for any row whose run is in the other table. An include of the mirror would return null under a non-null type. Type them as TaskRunV2? to match runtime: the legacy relation anchors the required scalar, so Prisma accepts the optional second relation. Client-type change only (the FK is already stripped and the scalar unchanged), so no migration. --- internal-packages/database/prisma/schema.prisma | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 4244b43d602..95c5b4f3bca 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -1455,7 +1455,7 @@ model TaskRunExecutionSnapshot { runId String run TaskRun @relation(fields: [runId], references: [id], map: "TaskRunExecutionSnapshot_runId_fkey") /// Mirror relation to TaskRunV2 reusing the same runId scalar (FK stripped in prod) - runV2 TaskRunV2 @relation("executionSnapshotsV2", fields: [runId], references: [id], map: "TaskRunExecutionSnapshot_runId_v2_fkey") + runV2 TaskRunV2? @relation("executionSnapshotsV2", fields: [runId], references: [id], map: "TaskRunExecutionSnapshot_runId_v2_fkey") runStatus TaskRunStatus // Batch @@ -1644,7 +1644,7 @@ model TaskRunWaitpoint { taskRun TaskRun @relation(fields: [taskRunId], references: [id], map: "TaskRunWaitpoint_taskRunId_fkey") /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) - taskRunV2 TaskRunV2 @relation("taskRunWaitpointsV2", fields: [taskRunId], references: [id], map: "TaskRunWaitpoint_taskRunId_v2_fkey") + taskRunV2 TaskRunV2? @relation("taskRunWaitpointsV2", fields: [taskRunId], references: [id], map: "TaskRunWaitpoint_taskRunId_v2_fkey") taskRunId String waitpoint Waitpoint @relation(fields: [waitpointId], references: [id]) @@ -1827,7 +1827,7 @@ model TaskRunDependency { /// The child run taskRun TaskRun @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunDependency_taskRunId_fkey") /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) - taskRunV2 TaskRunV2 @relation("dependencyV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunDependency_taskRunId_v2_fkey") + taskRunV2 TaskRunV2? @relation("dependencyV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunDependency_taskRunId_v2_fkey") taskRunId String @unique checkpointEvent CheckpointRestoreEvent? @relation(fields: [checkpointEventId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -1877,7 +1877,7 @@ model TaskRunAttempt { taskRun TaskRun @relation("attempts", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunAttempt_taskRunId_fkey") /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) - taskRunV2 TaskRunV2 @relation("attemptsV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunAttempt_taskRunId_v2_fkey") + taskRunV2 TaskRunV2? @relation("attemptsV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "TaskRunAttempt_taskRunId_v2_fkey") taskRunId String backgroundWorker BackgroundWorker @relation(fields: [backgroundWorkerId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -2202,7 +2202,7 @@ model BatchTaskRunItem { taskRun TaskRun @relation(fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BatchTaskRunItem_taskRunId_fkey") /// Mirror relation to TaskRunV2 reusing the same taskRunId scalar (FK stripped in prod) - taskRunV2 TaskRunV2 @relation("batchItemsV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BatchTaskRunItem_taskRunId_v2_fkey") + taskRunV2 TaskRunV2? @relation("batchItemsV2", fields: [taskRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BatchTaskRunItem_taskRunId_v2_fkey") taskRunId String taskRunAttempt TaskRunAttempt? @relation(fields: [taskRunAttemptId], references: [id], onDelete: SetNull, onUpdate: Cascade) @@ -2299,7 +2299,7 @@ model Checkpoint { run TaskRun @relation(fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "Checkpoint_runId_fkey") /// Mirror relation to TaskRunV2 reusing the same runId scalar (FK stripped in prod) - runV2 TaskRunV2 @relation("checkpointsV2", fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "Checkpoint_runId_v2_fkey") + runV2 TaskRunV2? @relation("checkpointsV2", fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "Checkpoint_runId_v2_fkey") runId String attempt TaskRunAttempt @relation(fields: [attemptId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -2336,7 +2336,7 @@ model CheckpointRestoreEvent { run TaskRun @relation(fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "CheckpointRestoreEvent_runId_fkey") /// Mirror relation to TaskRunV2 reusing the same runId scalar (FK stripped in prod) - runV2 TaskRunV2 @relation("checkpointRestoreEventsV2", fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "CheckpointRestoreEvent_runId_v2_fkey") + runV2 TaskRunV2? @relation("checkpointRestoreEventsV2", fields: [runId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "CheckpointRestoreEvent_runId_v2_fkey") runId String attempt TaskRunAttempt @relation(fields: [attemptId], references: [id], onDelete: Cascade, onUpdate: Cascade) @@ -2807,7 +2807,7 @@ model BulkActionItem { /// The run that is the source of the action, e.g. when replaying this is the original run sourceRun TaskRun @relation("SourceActionItemRun", fields: [sourceRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BulkActionItem_sourceRunId_fkey") /// Mirror relation to TaskRunV2 reusing the same sourceRunId scalar (FK stripped in prod) - sourceRunV2 TaskRunV2 @relation("SourceActionItemRunV2", fields: [sourceRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BulkActionItem_sourceRunId_v2_fkey") + sourceRunV2 TaskRunV2? @relation("SourceActionItemRunV2", fields: [sourceRunId], references: [id], onDelete: Cascade, onUpdate: Cascade, map: "BulkActionItem_sourceRunId_v2_fkey") sourceRunId String /// The run that's a result of the action, this will be set when the run has been created From 7a4bc4acf6668ee94acb43f71de6a2c1e1314c12 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 01:36:02 +0100 Subject: [PATCH 77/83] fix(webapp): make the FK-drift guard match schema-qualified REFERENCES Prisma emits foreign keys as REFERENCES "public"."TaskRun" (schema-qualified) in every generated migration in this repo, but the guard only matched the bare "TaskRun"/"task_run_v2", so it was effectively a no-op against real migrate-dev output: it would not catch a regenerated _v2_fkey, nor the implicit m2m _WaitpointRunConnections_A_fkey / _TaskRunToTaskRunTag_A_fkey (the former would break trigger-and-wait for v2 runs). Match both bare and schema-qualified forms, with fixtures pinning both so the qualified form cannot regress undetected. --- apps/webapp/test/runTableFkDriftGuard.test.ts | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/apps/webapp/test/runTableFkDriftGuard.test.ts b/apps/webapp/test/runTableFkDriftGuard.test.ts index 224e9a7c279..df69c8274db 100644 --- a/apps/webapp/test/runTableFkDriftGuard.test.ts +++ b/apps/webapp/test/runTableFkDriftGuard.test.ts @@ -41,12 +41,18 @@ describe("run-table FK-drift guard", () => { // A statement that ADDs a foreign key referencing `table`. Checked per // statement (split on ;) so FOREIGN KEY in one statement can't pair with - // REFERENCES in a later one. + // REFERENCES in a later one. The REFERENCES match is QUALIFICATION-AGNOSTIC: + // Prisma emits the schema-qualified form `REFERENCES "public"."TaskRun"` in + // every generated migration in this repo (including the implicit m2m join + // tables _WaitpointRunConnections / _TaskRunToTaskRunTag), so matching only + // the bare `"TaskRun"` would silently miss the real regeneration vector. const addsForeignKeyReferencing = (sql: string, table: string) => sql .split(";") .some( - (stmt) => /FOREIGN KEY/i.test(stmt) && new RegExp(`REFERENCES\\s+"${table}"`, "i").test(stmt) + (stmt) => + /FOREIGN KEY/i.test(stmt) && + new RegExp(`REFERENCES\\s+(?:"[A-Za-z0-9_]+"\\.)?"${table}"`, "i").test(stmt) ); it("finds the migrations directory and the FK-drop migration", () => { @@ -54,6 +60,24 @@ describe("run-table FK-drift guard", () => { expect(migrationDirs).toContain(DROP_FKS_MIGRATION); }); + it("the matcher catches both bare and schema-qualified REFERENCES forms", () => { + // Prisma actually emits the qualified form; both must be caught so the + // qualified form can never regress undetected. + const qualifiedV2 = + 'ALTER TABLE "TaskRunAttempt" ADD CONSTRAINT "TaskRunAttempt_taskRunId_v2_fkey" FOREIGN KEY ("taskRunId") REFERENCES "public"."task_run_v2"("id") ON DELETE CASCADE;'; + const qualifiedM2M = + 'ALTER TABLE "_WaitpointRunConnections" ADD CONSTRAINT "_WaitpointRunConnections_A_fkey" FOREIGN KEY ("A") REFERENCES "public"."TaskRun"("id") ON DELETE CASCADE;'; + const bareTaskRun = + 'ALTER TABLE "TaskRunDependency" ADD CONSTRAINT "x_fkey" FOREIGN KEY ("taskRunId") REFERENCES "TaskRun"("id");'; + const unrelated = + 'ALTER TABLE "Foo" ADD CONSTRAINT "y_fkey" FOREIGN KEY ("barId") REFERENCES "public"."Bar"("id");'; + expect(addsForeignKeyReferencing(qualifiedV2, "task_run_v2")).toBe(true); + expect(addsForeignKeyReferencing(qualifiedM2M, "TaskRun")).toBe(true); + expect(addsForeignKeyReferencing(bareTaskRun, "TaskRun")).toBe(true); + expect(addsForeignKeyReferencing(unrelated, "TaskRun")).toBe(false); + expect(addsForeignKeyReferencing(unrelated, "task_run_v2")).toBe(false); + }); + it("no migration EVER adds a foreign key referencing task_run_v2", () => { const offenders = migrationDirs.filter((dir) => addsForeignKeyReferencing(sqlOf(dir), "task_run_v2")); expect( From 60d8662d65d380957106e67c1cd4f136ca1b22c8 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 01:56:33 +0100 Subject: [PATCH 78/83] fix(webapp): route a v2 org pre-failed run to task_run_v2 triggerFailedTask minted the pre-failed run with a cuid (RunId.generate), so it landed in legacy TaskRun even for an org cut over to v2. Trigger-time failures (queue limits, validation, payload errors) are common for some orgs, and these runs frequently carry a parentTaskRunId / resumeParentOnCompletion / batch, so each one created an ongoing cross-table edge (a v2 parent or batch with a legacy failed child) on the failure path, not just the transient mixed window. Gate the id mint on shouldUseV2RunTable like triggerTask: the main call() path uses the request org flags; the degraded callWithoutTraceEvents() path loads them by org id and falls back to a legacy id only if the org cannot be resolved. --- .../services/triggerFailedTask.server.ts | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index 031411844b4..2b5c1c8410e 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -10,6 +10,8 @@ import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRun import { DefaultQueueManager } from "../concerns/queues.server"; import type { TriggerTaskRequest } from "../types"; import { runStore } from "~/v3/runStore.server"; +import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; +import { env } from "~/env.server"; export type TriggerFailedTaskRequest = { /** The task identifier (e.g. "my-task") */ @@ -67,7 +69,19 @@ export class TriggerFailedTaskService { } async call(request: TriggerFailedTaskRequest): Promise { - const failedRunFriendlyId = RunId.generate().friendlyId; + // Mint the failed run on the same physical table the org's other runs use: + // a v2 org's failed run is a KSUID (-> task_run_v2), not a cuid in legacy + // TaskRun. Otherwise every trigger-time failure (queue limits, validation, + // payload errors) would land in the wrong table and, when it has a parent or + // batch, create an ongoing cross-table edge on the failure path. Mirrors the + // mint gate in triggerTask.server.ts. + const failedRunFriendlyId = ( + shouldUseV2RunTable(request.environment.organization.featureFlags, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }) + ? RunId.generateKsuid() + : RunId.generate() + ).friendlyId; const taskRunError: TaskRunError = { type: "INTERNAL_ERROR" as const, code: request.errorCode ?? TaskRunErrorCodes.UNSPECIFIED_ERROR, @@ -268,7 +282,26 @@ export class TriggerFailedTaskService { batch?: { id: string; index: number }; errorCode?: TaskRunErrorCodes; }): Promise { - const failedRunFriendlyId = RunId.generate().friendlyId; + // Keep the failed run on the org's table even on this degraded path. The + // caller couldn't fully resolve the environment, so load the org flags by id + // to decide; if even that fails, default to a legacy id (safe: RunStore + // routes by id format either way, and an unresolvable org is a rare edge). + let useV2RunTable = false; + try { + const org = await this.prisma.organization.findFirst({ + where: { id: opts.organizationId }, + select: { featureFlags: true }, + }); + useV2RunTable = shouldUseV2RunTable( + (org?.featureFlags as Record) ?? null, + { nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1" } + ); + } catch { + // Leave useV2RunTable=false (legacy id). + } + const failedRunFriendlyId = ( + useV2RunTable ? RunId.generateKsuid() : RunId.generate() + ).friendlyId; try { // Best-effort parent run lookup for rootTaskRunId/depth From 2bc70bece5780e7f58da570060afdc779445accb Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 02:34:35 +0100 Subject: [PATCH 79/83] feat(webapp): gate v2 minting on ClickHouse publication readiness; scope reads on v2-may-exist #1 publication-readiness interlock: a v2 run minted before task_run_v2 is in the ClickHouse replication publication is permanently absent from ClickHouse (Postgres only decodes changes for transactions that begin after ALTER PUBLICATION ADD TABLE, which the replication leader runs at its own startup, not via a migration), and the run list/metrics/tags are ClickHouse-only. Add a cached, periodically-refreshed status (runTableV2Status.server.ts) and gate minting (triggerTask, triggerFailedTask) through canMintV2Run = org cut over AND table published. Minting fails safe to legacy until the publication carries the table and self-heals once it does, removing the manual pg_publication_tables enable step. #2 native-rollback read scope: cross-table read scoping (idempotency dedup, ApiRetrieveRun hierarchy) keyed on the native master switch alone, so disabling native realtime after v2 runs exist re-scoped reads to legacy and hid existing v2 runs (an idempotency dedup miss means duplicate execution). Scope on v2RunsMayExist (native on OR task_run_v2 has rows) instead; it is monotonic, so the read scope cannot regress once v2 runs exist. --- .../v3/ApiRetrieveRunPresenter.server.ts | 24 ++-- .../concerns/idempotencyKeys.server.ts | 21 ++-- .../services/triggerFailedTask.server.ts | 11 +- .../runEngine/services/triggerTask.server.ts | 4 +- apps/webapp/app/v3/runTableV2Status.server.ts | 104 ++++++++++++++++++ apps/webapp/test/runTableV2Status.test.ts | 53 +++++++++ 6 files changed, 189 insertions(+), 28 deletions(-) create mode 100644 apps/webapp/app/v3/runTableV2Status.server.ts create mode 100644 apps/webapp/test/runTableV2Status.test.ts diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index 56450b0fb1d..13adb18cab7 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -24,6 +24,7 @@ import { import { generatePresignedUrl } from "~/v3/objectStore.server"; import { runStore } from "~/v3/runStore.server"; import { hydrateParentAndRoot, hydrateChildRuns } from "~/v3/runHierarchy.server"; +import { v2RunsMayExist } from "~/v3/runTableV2Status.server"; import { env as serverEnv } from "~/env.server"; import { tracer } from "~/v3/tracer.server"; import { startSpanWithEnv } from "~/v3/tracing.server"; @@ -148,16 +149,19 @@ export class ApiRetrieveRunPresenter { // legacy run's v2 children), which arise in the mixed window, would come // back null/empty. Resolve parent/root by id (RunStore routes by format) // and children by a both-table predicate. - // Scope the cross-table reads on whether ANY v2 run can exist in this - // deployment (the native master switch), NOT the org's current flag: a - // run's table is fixed by its id format, and an org that was on v2 then - // flipped off still HAS v2 runs (and v2 children) that runTableV2.server.ts - // documents as staying readable. pgRow is routed here by id format, so it - // can be a v2 run for a now-non-v2 org; scoping to "legacy" off the per-org - // flag would then silently drop its v2 children/parent. Until native is - // enabled no v2 run exists yet (minting requires it), so "legacy" is safe - // and skips the empty task_run_v2 query. The reads also run in parallel. - const tables = serverEnv.REALTIME_BACKEND_NATIVE_ENABLED === "1" ? "both" : "legacy"; + // Scope the cross-table reads on whether a v2 run could exist at all, NOT + // the org's current flag: a run's table is fixed by its id format, and an + // org that was on v2 then flipped off still HAS v2 runs (and v2 children) + // that stay readable. pgRow is routed here by id format, so it can be a v2 + // run for a now-non-v2 org; scoping to "legacy" would then silently drop + // its v2 children/parent. v2RunsMayExist is monotonic (native on now, OR + // task_run_v2 already has rows), so turning the native master switch off + // does not re-scope to legacy and hide existing v2 runs. While no v2 run + // has ever existed it stays "legacy" and skips the empty task_run_v2 query. + // The reads also run in parallel. + const tables = v2RunsMayExist(serverEnv.REALTIME_BACKEND_NATIVE_ENABLED === "1") + ? "both" + : "legacy"; const [{ parentTaskRun, rootTaskRun }, childRuns] = await Promise.all([ hydrateParentAndRoot( { parentTaskRunId: pgRow.parentTaskRunId, rootTaskRunId: pgRow.rootTaskRunId }, diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index efadf7d2c4a..39a38e14d81 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -12,6 +12,7 @@ import { claimOrAwait } from "~/v3/mollifier/idempotencyClaim.server"; import { makeResolveMollifierFlag } from "~/v3/mollifier/mollifierGate.server"; import { runStore } from "~/v3/runStore.server"; import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; +import { v2RunsMayExist } from "~/v3/runTableV2Status.server"; import type { TraceEventConcern, TriggerTaskRequest } from "../types"; // In-memory per-org mollifier-enabled check, shared with `evaluateGate` @@ -313,16 +314,16 @@ export class IdempotencyKeyConcern { nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", }); - // Scope the idempotency dedup read on whether ANY v2 run can exist in this - // deployment (the native master switch), NOT on whether this org currently - // mints v2. A run's table is fixed by its id format, so an org that was on - // v2 then flipped off still holds v2 runs an idempotency key can match - // (runTableV2.server.ts documents they stay readable); gating the read on - // orgUsesV2 would miss them and let a duplicate through. Until native is - // enabled no v2 run exists yet (minting requires it), so "legacy" is safe and - // skips the empty task_run_v2 query on the trigger hot path; once native is - // on, read both. - const anyV2RunsPossible = env.REALTIME_BACKEND_NATIVE_ENABLED === "1"; + // Scope the idempotency dedup read on whether a v2 run could exist at all, + // NOT on whether this org currently mints v2. A run's table is fixed by its + // id format, so an org that was on v2 then flipped off still holds v2 runs an + // idempotency key can match; gating the read on orgUsesV2 would miss them and + // let a duplicate through. v2RunsMayExist is monotonic (native on now, OR + // task_run_v2 already has rows), so turning the native master switch off + // after v2 runs exist does NOT re-scope the read back to legacy and hide + // them. While no v2 run has ever existed it stays "legacy" and skips the + // empty task_run_v2 query on the trigger hot path. + const anyV2RunsPossible = v2RunsMayExist(env.REALTIME_BACKEND_NATIVE_ENABLED === "1"); const existingRun = idempotencyKey ? await runStore.findRun( diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index 2b5c1c8410e..a9920bbfd6b 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -10,7 +10,7 @@ import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRun import { DefaultQueueManager } from "../concerns/queues.server"; import type { TriggerTaskRequest } from "../types"; import { runStore } from "~/v3/runStore.server"; -import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; +import { canMintV2Run } from "~/v3/runTableV2Status.server"; import { env } from "~/env.server"; export type TriggerFailedTaskRequest = { @@ -76,7 +76,7 @@ export class TriggerFailedTaskService { // batch, create an ongoing cross-table edge on the failure path. Mirrors the // mint gate in triggerTask.server.ts. const failedRunFriendlyId = ( - shouldUseV2RunTable(request.environment.organization.featureFlags, { + canMintV2Run(request.environment.organization.featureFlags, { nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", }) ? RunId.generateKsuid() @@ -292,10 +292,9 @@ export class TriggerFailedTaskService { where: { id: opts.organizationId }, select: { featureFlags: true }, }); - useV2RunTable = shouldUseV2RunTable( - (org?.featureFlags as Record) ?? null, - { nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1" } - ); + useV2RunTable = canMintV2Run((org?.featureFlags as Record) ?? null, { + nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", + }); } catch { // Leave useV2RunTable=false (legacy id). } diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index bdf656e3afc..370a996a1eb 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -25,7 +25,7 @@ import { logger } from "~/services/logger.server"; import { parseDelay } from "~/utils/delays"; import { handleMetadataPacket } from "~/utils/packets"; import { startSpan } from "~/v3/tracing.server"; -import { shouldUseV2RunTable } from "~/v3/runTableV2.server"; +import { canMintV2Run } from "~/v3/runTableV2Status.server"; import type { TriggerTaskServiceOptions, TriggerTaskServiceResult, @@ -159,7 +159,7 @@ export class RunEngineTriggerTaskService { // trigger hot path. Downstream routing is by id format only. const runFriendlyId = options?.runFriendlyId ?? - (shouldUseV2RunTable(environment.organization.featureFlags, { + (canMintV2Run(environment.organization.featureFlags, { nativeRealtimeEnabled: env.REALTIME_BACKEND_NATIVE_ENABLED === "1", }) ? RunId.generateKsuid() diff --git a/apps/webapp/app/v3/runTableV2Status.server.ts b/apps/webapp/app/v3/runTableV2Status.server.ts new file mode 100644 index 00000000000..bee32f9a1b3 --- /dev/null +++ b/apps/webapp/app/v3/runTableV2Status.server.ts @@ -0,0 +1,104 @@ +import { prisma } from "~/db.server"; +import { env } from "~/env.server"; +import { logger } from "~/services/logger.server"; +import { singleton } from "~/utils/singleton"; +import { shouldUseV2RunTable, type ShouldUseV2RunTableOptions } from "~/v3/runTableV2.server"; + +/** + * Cached, periodically-refreshed facts about the `task_run_v2` table, read OFF + * the trigger hot path (no per-request DB query) to gate v2 minting and + * cross-table read scoping. + */ +type RunTableV2Status = { + /** + * Is `task_run_v2` in the ClickHouse logical-replication publication? + * + * Postgres only decodes a table's changes for transactions that BEGIN after + * the decoder sees `ALTER PUBLICATION ... ADD TABLE`, and that ADD TABLE is run + * lazily by the replication leader on its own startup, NOT by a migration. So a + * v2 run minted before the table is published is permanently absent from + * ClickHouse with no backfill, and the run list / metrics / tags / bulk actions + * are ClickHouse-only. Mint v2 ONLY when this is true; otherwise mint legacy + * (fail-safe), self-healing once the leader publishes the table. + */ + published: boolean; + /** + * Has any v2 run ever existed (monotonic in practice)? Cross-table READ scoping + * uses this (OR the native master switch) rather than the master switch alone, + * so disabling native realtime cannot re-scope reads back to legacy and hide + * already-minted v2 runs from idempotency dedup and hierarchy reads. + */ + hasRows: boolean; +}; + +const REFRESH_INTERVAL_MS = 30_000; + +const status = singleton("runTableV2Status", initialize); + +function initialize(): RunTableV2Status { + const state: RunTableV2Status = { published: false, hasRows: false }; + + // The publication only exists when runs replication is configured. Without it + // no v2 run can be captured by ClickHouse, so leave published=false: minting + // stays on legacy regardless of org flags. + if (!env.RUN_REPLICATION_CLICKHOUSE_URL) { + return state; + } + + const refresh = async () => { + try { + const published = await prisma.$queryRaw>` + SELECT EXISTS ( + SELECT 1 FROM pg_publication_tables + WHERE pubname = ${env.RUN_REPLICATION_PUBLICATION_NAME} + AND tablename = 'task_run_v2' + ) AS present`; + state.published = published[0]?.present ?? false; + + // hasRows is monotonic; once true, stop probing. + if (!state.hasRows) { + const hasRows = await prisma.$queryRaw>` + SELECT EXISTS (SELECT 1 FROM task_run_v2 LIMIT 1) AS present`; + state.hasRows = hasRows[0]?.present ?? false; + } + } catch (error) { + logger.warn("runTableV2Status refresh failed; keeping last-known status", { + error: error instanceof Error ? error.message : String(error), + }); + } + }; + + void refresh(); + const timer = setInterval(() => void refresh(), REFRESH_INTERVAL_MS); + timer.unref?.(); + + return state; +} + +/** `task_run_v2` is in the ClickHouse replication publication (cached, off the hot path). */ +export function isV2RunTablePublished(): boolean { + return status.published; +} + +/** + * Whether a v2 run could be relevant to a cross-table READ: native realtime is on + * (v2 is being minted now) OR `task_run_v2` already holds rows. Scope cross-table + * reads on this, not the native master switch alone, so turning native off cannot + * hide already-minted v2 runs. + */ +export function v2RunsMayExist(nativeRealtimeEnabled: boolean): boolean { + return nativeRealtimeEnabled || status.hasRows; +} + +/** + * Mint gate: mint a v2 (KSUID) run only when the org is cut over to v2 AND + * `task_run_v2` is in the ClickHouse publication, so a v2 run can never be + * silently lost from ClickHouse by being minted before the replication leader + * publishes the table. Fails safe to legacy until then; self-heals once published. + */ +export function canMintV2Run( + orgFeatureFlags: unknown, + options: ShouldUseV2RunTableOptions +): boolean { + return shouldUseV2RunTable(orgFeatureFlags, options) && isV2RunTablePublished(); +} diff --git a/apps/webapp/test/runTableV2Status.test.ts b/apps/webapp/test/runTableV2Status.test.ts new file mode 100644 index 00000000000..91443a9d55d --- /dev/null +++ b/apps/webapp/test/runTableV2Status.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from "vitest"; +import { canMintV2Run, v2RunsMayExist } from "~/v3/runTableV2Status.server"; + +// The module caches its status in a globalThis singleton ("runTableV2Status"). +// In the unit-test env runs replication is unconfigured, so it initializes to +// { published:false, hasRows:false } with no background poller. Mutate that +// cached object to exercise the gates deterministically. +function setStatus(published: boolean, hasRows: boolean) { + const singletons = (globalThis as any).__trigger_singletons; + // Force module init (the singleton is created on first getter call/import). + v2RunsMayExist(false); + singletons.runTableV2Status.published = published; + singletons.runTableV2Status.hasRows = hasRows; +} + +const CUTOVER_FLAGS = { realtimeBackend: "native", runTableV2: true }; + +describe("canMintV2Run (mint gate: org cut over AND task_run_v2 published)", () => { + it("mints v2 only when the org is cut over AND the table is published", () => { + setStatus(true, true); + expect(canMintV2Run(CUTOVER_FLAGS, { nativeRealtimeEnabled: true })).toBe(true); + }); + + it("fails safe to legacy when the org is cut over but the table is NOT published", () => { + setStatus(false, true); + expect(canMintV2Run(CUTOVER_FLAGS, { nativeRealtimeEnabled: true })).toBe(false); + }); + + it("stays legacy when the org is not cut over, even if published", () => { + setStatus(true, true); + expect( + canMintV2Run({ realtimeBackend: "electric", runTableV2: false }, { nativeRealtimeEnabled: true }) + ).toBe(false); + expect(canMintV2Run(CUTOVER_FLAGS, { nativeRealtimeEnabled: false })).toBe(false); + }); +}); + +describe("v2RunsMayExist (read scope: native on OR table has rows)", () => { + it("is true when native realtime is on (v2 being minted now)", () => { + setStatus(false, false); + expect(v2RunsMayExist(true)).toBe(true); + }); + + it("is true when task_run_v2 already has rows even with native OFF (rollback safety)", () => { + setStatus(false, true); + expect(v2RunsMayExist(false)).toBe(true); + }); + + it("is false only when native is off AND no v2 run has ever existed", () => { + setStatus(false, false); + expect(v2RunsMayExist(false)).toBe(false); + }); +}); From 1c3f5ca8cbf2c8dd4b6977844524f669a3bbdc16 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 05:49:17 +0100 Subject: [PATCH 80/83] fix(webapp): isolate runTableV2Status from its poller under test; qualify publication probe by schema Code-review follow-ups on the v2 hardening: - runTableV2Status no longer starts its background poller under vitest (NODE_ENV=test). The module is imported by the mint/read sites, so the import-time poll plus setInterval was firing live DB queries against the test database and leaking a timer, and the async refresh could race tests that drive the cached status directly. Tests exercise the gates by mutating the cached state, so the poller only gets in the way. - The publication-readiness probe now filters pg_publication_tables on schemaname = "public", so a same-named table in another published schema cannot satisfy the check. --- apps/webapp/app/v3/runTableV2Status.server.ts | 10 ++++++++++ apps/webapp/test/runTableV2Status.test.ts | 7 ++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/v3/runTableV2Status.server.ts b/apps/webapp/app/v3/runTableV2Status.server.ts index bee32f9a1b3..07cfbf2d4d5 100644 --- a/apps/webapp/app/v3/runTableV2Status.server.ts +++ b/apps/webapp/app/v3/runTableV2Status.server.ts @@ -38,6 +38,15 @@ const status = singleton("runTableV2Status", initialize); function initialize(): RunTableV2Status { const state: RunTableV2Status = { published: false, hasRows: false }; + // No background poller under vitest: this module is imported by the mint/read + // sites, so a live DB poll + setInterval at import time would query the test + // database and leak a timer for the test run, and the async refresh could race + // tests that drive the cached status directly. Tests exercise the gates by + // mutating the cached state, so the poller would only get in the way. + if (env.NODE_ENV === "test") { + return state; + } + // The publication only exists when runs replication is configured. Without it // no v2 run can be captured by ClickHouse, so leave published=false: minting // stays on legacy regardless of org flags. @@ -51,6 +60,7 @@ function initialize(): RunTableV2Status { SELECT EXISTS ( SELECT 1 FROM pg_publication_tables WHERE pubname = ${env.RUN_REPLICATION_PUBLICATION_NAME} + AND schemaname = 'public' AND tablename = 'task_run_v2' ) AS present`; state.published = published[0]?.present ?? false; diff --git a/apps/webapp/test/runTableV2Status.test.ts b/apps/webapp/test/runTableV2Status.test.ts index 91443a9d55d..786a4dde43d 100644 --- a/apps/webapp/test/runTableV2Status.test.ts +++ b/apps/webapp/test/runTableV2Status.test.ts @@ -2,9 +2,10 @@ import { describe, expect, it } from "vitest"; import { canMintV2Run, v2RunsMayExist } from "~/v3/runTableV2Status.server"; // The module caches its status in a globalThis singleton ("runTableV2Status"). -// In the unit-test env runs replication is unconfigured, so it initializes to -// { published:false, hasRows:false } with no background poller. Mutate that -// cached object to exercise the gates deterministically. +// Under vitest (NODE_ENV=test) it skips the background poller entirely and +// initializes to { published:false, hasRows:false } — so no live DB query, no +// leaked interval, and nothing races these assertions. Mutate that cached +// object to exercise the gates deterministically. function setStatus(published: boolean, hasRows: boolean) { const singletons = (globalThis as any).__trigger_singletons; // Force module init (the singleton is created on first getter call/import). From 525e36366070473c2346b1d7cd22a4b7f9b96ba1 Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 06:59:15 +0100 Subject: [PATCH 81/83] fix(webapp): tolerate a legacy or corrupt backfill cursor instead of throwing The backfill cursor format changed from a bare run id to a composite _, and decodeBackfillCursor threw on anything without the separator. A backfill in flight across that change hands the new decoder an old bare-id cursor, so it would throw on every batch. Treat an unparsable cursor (a legacy bare id, or corrupt) as "no cursor" and restart the window: re-backfill is idempotent (ClickHouse ReplacingMergeTree keyed by run id), so the in-flight job self-recovers instead of failing. Logs a warning. Adds cursor round-trip and legacy-format tests. --- .../app/services/runsBackfiller.server.ts | 21 ++++++++++++--- apps/webapp/test/runsBackfiller.test.ts | 27 ++++++++++++++++++- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/apps/webapp/app/services/runsBackfiller.server.ts b/apps/webapp/app/services/runsBackfiller.server.ts index 09386c9495c..cad704dfff5 100644 --- a/apps/webapp/app/services/runsBackfiller.server.ts +++ b/apps/webapp/app/services/runsBackfiller.server.ts @@ -47,6 +47,15 @@ export class RunsBackfillerService { // different ranges. RunStore merges the two tables only on a time-based // key, so order by createdAt and tiebreak on id within a timestamp. const keyset = cursor ? decodeBackfillCursor(cursor) : undefined; + if (cursor && !keyset) { + // Legacy/corrupt cursor: ignore it and restart the window (idempotent + // re-backfill). Self-recovers a backfill in flight across the cursor + // format change instead of throwing on every batch. + this.logger.warn( + "RunsBackfillerService: unparsable backfill cursor, restarting from window start", + { cursor } + ); + } const runs = await runStore.findRuns( { @@ -122,15 +131,19 @@ export function encodeBackfillCursor(createdAt: Date, id: string): string { return `${createdAt.toISOString()}${BACKFILL_CURSOR_SEPARATOR}${id}`; } -export function decodeBackfillCursor(cursor: string): { createdAt: Date; id: string } { +export function decodeBackfillCursor(cursor: string): { createdAt: Date; id: string } | undefined { const separatorIndex = cursor.indexOf(BACKFILL_CURSOR_SEPARATOR); const createdAt = separatorIndex === -1 ? new Date(NaN) : new Date(cursor.slice(0, separatorIndex)); const id = separatorIndex === -1 ? "" : cursor.slice(separatorIndex + 1); + // A cursor with no separator is the pre-(createdAt, id) format (a bare run id, + // e.g. a backfill that was in flight across this change), or otherwise corrupt. + // The old id-only keyset can't be translated to the new (createdAt, id) order, + // so return undefined and let the caller restart the window. Re-backfilling is + // idempotent (ClickHouse ReplacingMergeTree keyed by run id), so the only cost + // is redoing the already-done portion once. if (Number.isNaN(createdAt.getTime()) || id.length === 0) { - throw new Error( - `RunsBackfillerService: malformed cursor "${cursor}" (expected "_")` - ); + return undefined; } return { createdAt, id }; diff --git a/apps/webapp/test/runsBackfiller.test.ts b/apps/webapp/test/runsBackfiller.test.ts index fbdb16a4a7b..da291f094f9 100644 --- a/apps/webapp/test/runsBackfiller.test.ts +++ b/apps/webapp/test/runsBackfiller.test.ts @@ -9,13 +9,38 @@ vi.mock("~/db.server", () => ({ import { ClickHouse } from "@internal/clickhouse"; import { replicationContainerTest } from "@internal/testcontainers"; import { z } from "zod"; -import { RunsBackfillerService } from "~/services/runsBackfiller.server"; +import { + RunsBackfillerService, + decodeBackfillCursor, + encodeBackfillCursor, +} from "~/services/runsBackfiller.server"; import { RunsReplicationService } from "~/services/runsReplicationService.server"; import { createInMemoryTracing } from "./utils/tracing"; import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; vi.setConfig({ testTimeout: 60_000 }); +describe("backfill cursor", () => { + it("round-trips createdAt + id", () => { + const createdAt = new Date("2026-06-23T00:00:00.000Z"); + const decoded = decodeBackfillCursor(encodeBackfillCursor(createdAt, "cmqpwioyy0009unul63v3mxw2")); + expect(decoded?.createdAt.toISOString()).toBe(createdAt.toISOString()); + expect(decoded?.id).toBe("cmqpwioyy0009unul63v3mxw2"); + }); + + it("treats a legacy bare-id cursor (no separator) as undefined so the window restarts", () => { + // Pre-(createdAt, id) format: a bare run id. Decoding must not throw — it + // returns undefined so an in-flight backfill restarts the window instead of + // failing every batch after the cursor-format change. + expect(decodeBackfillCursor("cmqpwioyy0009unul63v3mxw2")).toBeUndefined(); + }); + + it("returns undefined for a corrupt cursor instead of throwing", () => { + expect(decodeBackfillCursor("not-a-date_cmqpwioyy0009unul63v3mxw2")).toBeUndefined(); + expect(decodeBackfillCursor("_cmqpwioyy0009unul63v3mxw2")).toBeUndefined(); + }); +}); + describe("RunsBackfillerService", () => { replicationContainerTest( "should backfill completed runs to clickhouse", From 3418d9d0a71f87ad1d587cb4f5d9e55f5b08ae0d Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 07:55:38 +0100 Subject: [PATCH 82/83] revert: keep backfill cursor decoder throwing on malformed input Reverts 525e36366 (graceful legacy-cursor handling). The legacy bare-id cursor only matters for a backfill in flight across the single deploy that changed the cursor format, and that backfill is internal and admin-triggered, so the one-time transition is handled operationally: do not run a backfill during the rollout deploy. Keeping decodeBackfillCursor throwing surfaces genuine cursor corruption loudly instead of silently restarting the window. The old keyset was id-ordered and the new one createdAt-ordered, so a stale cursor cannot be safely resumed anyway, only restarted. --- .../app/services/runsBackfiller.server.ts | 21 +++------------ apps/webapp/test/runsBackfiller.test.ts | 27 +------------------ 2 files changed, 5 insertions(+), 43 deletions(-) diff --git a/apps/webapp/app/services/runsBackfiller.server.ts b/apps/webapp/app/services/runsBackfiller.server.ts index cad704dfff5..09386c9495c 100644 --- a/apps/webapp/app/services/runsBackfiller.server.ts +++ b/apps/webapp/app/services/runsBackfiller.server.ts @@ -47,15 +47,6 @@ export class RunsBackfillerService { // different ranges. RunStore merges the two tables only on a time-based // key, so order by createdAt and tiebreak on id within a timestamp. const keyset = cursor ? decodeBackfillCursor(cursor) : undefined; - if (cursor && !keyset) { - // Legacy/corrupt cursor: ignore it and restart the window (idempotent - // re-backfill). Self-recovers a backfill in flight across the cursor - // format change instead of throwing on every batch. - this.logger.warn( - "RunsBackfillerService: unparsable backfill cursor, restarting from window start", - { cursor } - ); - } const runs = await runStore.findRuns( { @@ -131,19 +122,15 @@ export function encodeBackfillCursor(createdAt: Date, id: string): string { return `${createdAt.toISOString()}${BACKFILL_CURSOR_SEPARATOR}${id}`; } -export function decodeBackfillCursor(cursor: string): { createdAt: Date; id: string } | undefined { +export function decodeBackfillCursor(cursor: string): { createdAt: Date; id: string } { const separatorIndex = cursor.indexOf(BACKFILL_CURSOR_SEPARATOR); const createdAt = separatorIndex === -1 ? new Date(NaN) : new Date(cursor.slice(0, separatorIndex)); const id = separatorIndex === -1 ? "" : cursor.slice(separatorIndex + 1); - // A cursor with no separator is the pre-(createdAt, id) format (a bare run id, - // e.g. a backfill that was in flight across this change), or otherwise corrupt. - // The old id-only keyset can't be translated to the new (createdAt, id) order, - // so return undefined and let the caller restart the window. Re-backfilling is - // idempotent (ClickHouse ReplacingMergeTree keyed by run id), so the only cost - // is redoing the already-done portion once. if (Number.isNaN(createdAt.getTime()) || id.length === 0) { - return undefined; + throw new Error( + `RunsBackfillerService: malformed cursor "${cursor}" (expected "_")` + ); } return { createdAt, id }; diff --git a/apps/webapp/test/runsBackfiller.test.ts b/apps/webapp/test/runsBackfiller.test.ts index da291f094f9..fbdb16a4a7b 100644 --- a/apps/webapp/test/runsBackfiller.test.ts +++ b/apps/webapp/test/runsBackfiller.test.ts @@ -9,38 +9,13 @@ vi.mock("~/db.server", () => ({ import { ClickHouse } from "@internal/clickhouse"; import { replicationContainerTest } from "@internal/testcontainers"; import { z } from "zod"; -import { - RunsBackfillerService, - decodeBackfillCursor, - encodeBackfillCursor, -} from "~/services/runsBackfiller.server"; +import { RunsBackfillerService } from "~/services/runsBackfiller.server"; import { RunsReplicationService } from "~/services/runsReplicationService.server"; import { createInMemoryTracing } from "./utils/tracing"; import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; vi.setConfig({ testTimeout: 60_000 }); -describe("backfill cursor", () => { - it("round-trips createdAt + id", () => { - const createdAt = new Date("2026-06-23T00:00:00.000Z"); - const decoded = decodeBackfillCursor(encodeBackfillCursor(createdAt, "cmqpwioyy0009unul63v3mxw2")); - expect(decoded?.createdAt.toISOString()).toBe(createdAt.toISOString()); - expect(decoded?.id).toBe("cmqpwioyy0009unul63v3mxw2"); - }); - - it("treats a legacy bare-id cursor (no separator) as undefined so the window restarts", () => { - // Pre-(createdAt, id) format: a bare run id. Decoding must not throw — it - // returns undefined so an in-flight backfill restarts the window instead of - // failing every batch after the cursor-format change. - expect(decodeBackfillCursor("cmqpwioyy0009unul63v3mxw2")).toBeUndefined(); - }); - - it("returns undefined for a corrupt cursor instead of throwing", () => { - expect(decodeBackfillCursor("not-a-date_cmqpwioyy0009unul63v3mxw2")).toBeUndefined(); - expect(decodeBackfillCursor("_cmqpwioyy0009unul63v3mxw2")).toBeUndefined(); - }); -}); - describe("RunsBackfillerService", () => { replicationContainerTest( "should backfill completed runs to clickhouse", From 34b69e10a343be741318ae1eb64a9553eadb45bd Mon Sep 17 00:00:00 2001 From: Dan Sutton Date: Tue, 23 Jun 2026 08:15:17 +0100 Subject: [PATCH 83/83] fix(webapp,run-engine,scripts): harden v2 cross-table tests and tighten the recovery query - updateMetadata cross-table test: wrap the body in try/finally so stopFlushing always runs and the flush loop cannot bleed into later tests on a failure path. - cancelling cross-table cancel-cascade test: poll for the child CANCELED status with a deadline instead of a fixed 1s sleep, to de-flake it under slow CI. - recover-stuck-runs: constrain each UNION branch by id = ANY(runIds) so the recovery query scans only candidate rows instead of unioning both full tables before the join. --- apps/webapp/test/updateMetadata.test.ts | 191 +++++++++--------- .../src/engine/tests/cancelling.test.ts | 12 +- scripts/recover-stuck-runs.ts | 4 +- 3 files changed, 109 insertions(+), 98 deletions(-) diff --git a/apps/webapp/test/updateMetadata.test.ts b/apps/webapp/test/updateMetadata.test.ts index 11b6ba41c79..7d5314a1d45 100644 --- a/apps/webapp/test/updateMetadata.test.ts +++ b/apps/webapp/test/updateMetadata.test.ts @@ -1306,98 +1306,105 @@ describe("UpdateMetadataService.call", () => { logLevel: "debug", }); - const organization = await prisma.organization.create({ - data: { title: "test", slug: "test" }, - }); - const project = await prisma.project.create({ - data: { name: "test", slug: "test", organizationId: organization.id, externalRef: "test" }, - }); - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - // Legacy parent (cuid id) lives in TaskRun. This is the mixed-window - // hierarchy: an org flips runTableV2 on while a pre-flip parent is live, - // and its post-flip child mints a ksuid into task_run_v2. - const parentId = RunId.generate(); - expect(isKsuidId(parentId.id)).toBe(false); - const parentTaskRun = await prisma.taskRun.create({ - data: { - id: parentId.id, - friendlyId: parentId.friendlyId, - taskIdentifier: "my-task", - payload: "{}", - payloadType: "application/json", - traceId: "t", - spanId: "s", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - // v2 child (ksuid id) lives in task_run_v2 and points at the legacy - // parent by the scalar parentTaskRunId (no cross-table FK). - const childId = RunId.generateKsuid(); - expect(isKsuidId(childId.id)).toBe(true); - await prisma.taskRunV2.create({ - data: { - id: childId.id, - friendlyId: childId.friendlyId, - taskIdentifier: "my-child-task", - payload: "{}", - payloadType: "application/json", - traceId: "t", - spanId: "s", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - parentTaskRunId: parentTaskRun.id, - }, - }); - - // The child applies metadata.parent operations. Pre-fix, the table-bound - // parentTaskRun relation resolved null (parent is in the OTHER table), so - // the ops fell back to the child's own id — corrupting the child and - // never touching the parent. - await service.call(childId.id, { - parentOperations: [ - { type: "set", key: "foo", value: "bar" }, - { type: "append", key: "bar", value: "baz" }, - ], - }); - - // Wait for the buffered operations to flush. - await setTimeout(1000); - - // The PARENT (in TaskRun) must have received the operations. - const updatedParent = await prisma.taskRun.findFirst({ where: { id: parentTaskRun.id } }); - expect( - await parsePacket({ - data: updatedParent?.metadata ?? undefined, - dataType: updatedParent?.metadataType ?? "application/json", - }) - ).toEqual({ foo: "bar", bar: ["baz"] }); - - // The CHILD (in task_run_v2) must NOT have been polluted with parent ops. - const updatedChild = await prisma.taskRunV2.findFirst({ where: { id: childId.id } }); - expect(updatedChild?.metadata ?? null).toBeNull(); - - service.stopFlushing(); + try { + const organization = await prisma.organization.create({ + data: { title: "test", slug: "test" }, + }); + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // Legacy parent (cuid id) lives in TaskRun. This is the mixed-window + // hierarchy: an org flips runTableV2 on while a pre-flip parent is live, + // and its post-flip child mints a ksuid into task_run_v2. + const parentId = RunId.generate(); + expect(isKsuidId(parentId.id)).toBe(false); + const parentTaskRun = await prisma.taskRun.create({ + data: { + id: parentId.id, + friendlyId: parentId.friendlyId, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceId: "t", + spanId: "s", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + // v2 child (ksuid id) lives in task_run_v2 and points at the legacy + // parent by the scalar parentTaskRunId (no cross-table FK). + const childId = RunId.generateKsuid(); + expect(isKsuidId(childId.id)).toBe(true); + await prisma.taskRunV2.create({ + data: { + id: childId.id, + friendlyId: childId.friendlyId, + taskIdentifier: "my-child-task", + payload: "{}", + payloadType: "application/json", + traceId: "t", + spanId: "s", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + parentTaskRunId: parentTaskRun.id, + }, + }); + + // The child applies metadata.parent operations. Pre-fix, the table-bound + // parentTaskRun relation resolved null (parent is in the OTHER table), so + // the ops fell back to the child's own id — corrupting the child and + // never touching the parent. + await service.call(childId.id, { + parentOperations: [ + { type: "set", key: "foo", value: "bar" }, + { type: "append", key: "bar", value: "baz" }, + ], + }); + + // Wait for the buffered operations to flush. + await setTimeout(1000); + + // The PARENT (in TaskRun) must have received the operations. + const updatedParent = await prisma.taskRun.findFirst({ where: { id: parentTaskRun.id } }); + expect( + await parsePacket({ + data: updatedParent?.metadata ?? undefined, + dataType: updatedParent?.metadataType ?? "application/json", + }) + ).toEqual({ foo: "bar", bar: ["baz"] }); + + // The CHILD (in task_run_v2) must NOT have been polluted with parent ops. + const updatedChild = await prisma.taskRunV2.findFirst({ where: { id: childId.id } }); + expect(updatedChild?.metadata ?? null).toBeNull(); + } finally { + service.stopFlushing(); + } } ); }); diff --git a/internal-packages/run-engine/src/engine/tests/cancelling.test.ts b/internal-packages/run-engine/src/engine/tests/cancelling.test.ts index eff083658bc..75253684818 100644 --- a/internal-packages/run-engine/src/engine/tests/cancelling.test.ts +++ b/internal-packages/run-engine/src/engine/tests/cancelling.test.ts @@ -330,10 +330,14 @@ describe("RunEngine cancelling", () => { reason: "Cancelled by the user", }); - // The child cancellation is enqueued as a job; give the worker a moment. - await setTimeout(1000); - - const childData = await engine.getRunExecutionData({ runId: childRun.id }); + // The child cancellation is enqueued as a job; wait for the worker to process it + // (poll instead of a fixed sleep so the test isn't flaky under slow CI). + let childData = await engine.getRunExecutionData({ runId: childRun.id }); + const deadline = Date.now() + 5_000; + while (childData?.run.status !== "CANCELED" && Date.now() < deadline) { + await setTimeout(50); + childData = await engine.getRunExecutionData({ runId: childRun.id }); + } expect(childData?.run.status).toBe("CANCELED"); } finally { await engine.quit(); diff --git a/scripts/recover-stuck-runs.ts b/scripts/recover-stuck-runs.ts index 7770a455aec..7840498fc79 100755 --- a/scripts/recover-stuck-runs.ts +++ b/scripts/recover-stuck-runs.ts @@ -220,9 +220,9 @@ async function main() { r."concurrencyKey" FROM "TaskRunExecutionSnapshot" s INNER JOIN ( - SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM "TaskRun" + SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM "TaskRun" WHERE id = ANY(${runIds}) UNION - SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM task_run_v2 + SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM task_run_v2 WHERE id = ANY(${runIds}) ) r ON r.id = s."runId" WHERE s."runId" = ANY(${runIds}) AND s."isValid" = true