From 370642e6cbc707cc92ff4815be0946d81d2570d1 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Fri, 29 May 2026 19:52:57 -0700 Subject: [PATCH 1/2] perf(copilot): read chat transcripts from copilot_messages, not JSONB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flip user-facing chat reads from the legacy copilot_chats.messages JSONB array (5.7GB, 99% TOAST) to the normalized copilot_messages table via a new loadCopilotChatMessages helper ordered by seq NULLS LAST, created_at, id — the verified canonical order. Both chat-detail getters (getAccessibleCopilotChat, getAccessibleCopilotChatWithMessages) now drop the messages column from their metadata select (no more whole-array detoast on every load) and assemble the transcript from the table after authorization. This cascades to the copilot + mothership GET endpoints and to resolveOrCreateChat's conversationHistory (the LLM payload). The normalize/effective-transcript pipeline is source-agnostic (copilot_messages.content == a JSONB array element), so transcripts are byte-identical. Dual-write and the JSONB column stay in place as the internal-logic source and fallback; removing JSONB writes is a later step. Prod integrity verified before cutover: 0 messages missing, 0 NULL-seq, 0 dup keys/seq, 0 orphans, order-parity vs JSONB = 0 mismatches. Co-Authored-By: Claude Opus 4.8 --- apps/sim/lib/copilot/chat/lifecycle.test.ts | 124 ++++++++++++++++++++ apps/sim/lib/copilot/chat/lifecycle.ts | 62 ++++++++-- 2 files changed, 175 insertions(+), 11 deletions(-) create mode 100644 apps/sim/lib/copilot/chat/lifecycle.test.ts diff --git a/apps/sim/lib/copilot/chat/lifecycle.test.ts b/apps/sim/lib/copilot/chat/lifecycle.test.ts new file mode 100644 index 0000000000..3b1a84e632 --- /dev/null +++ b/apps/sim/lib/copilot/chat/lifecycle.test.ts @@ -0,0 +1,124 @@ +/** + * @vitest-environment node + */ +import { dbChainMock, dbChainMockFns, resetDbChainMock } from '@sim/testing' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('@sim/db', () => dbChainMock) + +const { mockAuthorizeWorkflow, mockGetActiveWorkflow } = vi.hoisted(() => ({ + mockAuthorizeWorkflow: vi.fn(), + mockGetActiveWorkflow: vi.fn(), +})) + +vi.mock('@sim/workflow-authz', () => ({ + authorizeWorkflowByWorkspacePermission: mockAuthorizeWorkflow, + getActiveWorkflowRecord: mockGetActiveWorkflow, +})) + +vi.mock('@/lib/workspaces/permissions/utils', () => ({ + assertActiveWorkspaceAccess: vi.fn(), + checkWorkspaceAccess: vi.fn(), +})) + +import { + getAccessibleCopilotChat, + getAccessibleCopilotChatWithMessages, + resolveOrCreateChat, +} from '@/lib/copilot/chat/lifecycle' + +const CHAT_ID = 'chat-1' +const USER_ID = 'user-1' + +// A chat with no workflow/workspace skips the authz lookups and authorizes directly. +const chatRow = { + id: CHAT_ID, + userId: USER_ID, + workflowId: null, + workspaceId: null, + type: 'copilot', + title: 'Test', + conversationId: null, + resources: [], + createdAt: new Date('2026-01-01T00:00:00.000Z'), + updatedAt: new Date('2026-01-01T00:00:00.000Z'), +} + +const userMsg = { id: 'm-user', role: 'user', content: 'Hi', timestamp: '2026-01-01T00:00:00.000Z' } +const asstMsg = { + id: 'm-asst', + role: 'assistant', + content: 'Hello', + timestamp: '2026-01-01T00:00:01.000Z', +} + +describe('lifecycle copilot chat reads (cutover to copilot_messages)', () => { + beforeEach(() => { + vi.clearAllMocks() + resetDbChainMock() + }) + + it('getAccessibleCopilotChatWithMessages sources messages from copilot_messages in seq order', async () => { + // 1st query: chat metadata (select().from().where().limit()) + dbChainMockFns.limit.mockResolvedValueOnce([chatRow]) + // 2nd query: messages (select().from().where().orderBy()) + dbChainMockFns.orderBy.mockResolvedValueOnce([{ content: userMsg }, { content: asstMsg }]) + + const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID) + + expect(result).not.toBeNull() + expect(result?.messages).toEqual([userMsg, asstMsg]) + expect(dbChainMockFns.orderBy).toHaveBeenCalledTimes(1) + }) + + it('returns an empty transcript for a chat with no messages', async () => { + dbChainMockFns.limit.mockResolvedValueOnce([chatRow]) + dbChainMockFns.orderBy.mockResolvedValueOnce([]) + + const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID) + + expect(result?.messages).toEqual([]) + }) + + it('returns null and does NOT query messages when the chat is not found', async () => { + dbChainMockFns.limit.mockResolvedValueOnce([]) + + const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID) + + expect(result).toBeNull() + expect(dbChainMockFns.orderBy).not.toHaveBeenCalled() + }) + + it('legacy getAccessibleCopilotChat also assembles messages from copilot_messages', async () => { + dbChainMockFns.limit.mockResolvedValueOnce([ + { ...chatRow, model: 'm', planArtifact: null, config: null }, + ]) + dbChainMockFns.orderBy.mockResolvedValueOnce([{ content: userMsg }]) + + const result = await getAccessibleCopilotChat(CHAT_ID, USER_ID) + + expect(result?.messages).toEqual([userMsg]) + }) + + it('resolveOrCreateChat returns conversationHistory from the table for an existing chat', async () => { + dbChainMockFns.limit.mockResolvedValueOnce([chatRow]) + dbChainMockFns.orderBy.mockResolvedValueOnce([{ content: userMsg }, { content: asstMsg }]) + + const result = await resolveOrCreateChat({ chatId: CHAT_ID, userId: USER_ID, model: 'm' }) + + expect(result.isNew).toBe(false) + expect(result.conversationHistory).toEqual([userMsg, asstMsg]) + }) + + it('resolveOrCreateChat creates a new chat with an empty transcript', async () => { + // insert().values().returning() -> fresh chat with empty messages + dbChainMockFns.returning.mockResolvedValueOnce([{ ...chatRow, messages: [] }]) + + const result = await resolveOrCreateChat({ userId: USER_ID, model: 'm' }) + + expect(result.isNew).toBe(true) + expect(result.conversationHistory).toEqual([]) + // a brand-new chat must not trigger a messages read + expect(dbChainMockFns.orderBy).not.toHaveBeenCalled() + }) +}) diff --git a/apps/sim/lib/copilot/chat/lifecycle.ts b/apps/sim/lib/copilot/chat/lifecycle.ts index 0a9802d0f6..e86c127c10 100644 --- a/apps/sim/lib/copilot/chat/lifecycle.ts +++ b/apps/sim/lib/copilot/chat/lifecycle.ts @@ -1,11 +1,11 @@ import { db } from '@sim/db' -import { copilotChats } from '@sim/db/schema' +import { copilotChats, copilotMessages } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { authorizeWorkflowByWorkspacePermission, getActiveWorkflowRecord, } from '@sim/workflow-authz' -import { and, eq } from 'drizzle-orm' +import { and, asc, eq, isNull, sql } from 'drizzle-orm' import { assertActiveWorkspaceAccess, checkWorkspaceAccess, @@ -35,22 +35,33 @@ const copilotChatAuthColumns = { } as const /** - * Column set for chat-detail callers that need the conversation transcript but - * not the copilot-only TOAST-able fields (`previewYaml`, `planArtifact`, - * `config`) or unused metadata (`model`, `pinned`, `lastSeenAt`). Selecting - * only these columns avoids the Postgres detoast cost on the dropped fields, - * which dominates latency for chats with large message histories. + * Column set for chat-detail callers that need chat metadata. The conversation + * transcript is no longer selected from `copilot_chats.messages` (JSONB) — + * reads now source it from the normalized `copilot_messages` table via + * `loadCopilotChatMessages`, which avoids detoasting the large messages blob on + * every load. The copilot-only TOAST-able fields (`previewYaml`, + * `planArtifact`, `config`) and unused metadata (`model`, `pinned`, + * `lastSeenAt`) remain excluded. */ const copilotChatDetailColumns = { ...copilotChatAuthColumns, title: copilotChats.title, - messages: copilotChats.messages, conversationId: copilotChats.conversationId, resources: copilotChats.resources, createdAt: copilotChats.createdAt, updatedAt: copilotChats.updatedAt, } as const +/** + * Returning column set for newly-inserted chats. A fresh chat has no + * `copilot_messages` rows yet, so the transcript is the just-inserted empty + * JSONB array — return it directly rather than issuing a second query. + */ +const copilotChatDetailReturningColumns = { + ...copilotChatDetailColumns, + messages: copilotChats.messages, +} as const + /** * Column set for the legacy copilot chat detail endpoint. Extends * `copilotChatDetailColumns` with `model`, `planArtifact`, and `config` — the @@ -64,6 +75,27 @@ const copilotChatLegacyDetailColumns = { config: copilotChats.config, } as const +/** + * Load a chat's transcript from the normalized `copilot_messages` table in + * canonical order (`seq` first, then `created_at`/`id` as a deterministic + * tiebreak; `NULLS LAST` so any not-yet-sequenced row sorts after sequenced + * ones). Each row's `content` is the full message object — identical in shape + * to a legacy JSONB array element — so the downstream normalize/transcript + * pipeline is unchanged. + */ +async function loadCopilotChatMessages(chatId: string): Promise[]> { + const rows = await db + .select({ content: copilotMessages.content }) + .from(copilotMessages) + .where(and(eq(copilotMessages.chatId, chatId), isNull(copilotMessages.deletedAt))) + .orderBy( + sql`${copilotMessages.seq} asc nulls last`, + asc(copilotMessages.createdAt), + asc(copilotMessages.id) + ) + return rows.map((row) => row.content as Record) +} + type CopilotChatAuthRow = Pick< typeof copilotChats.$inferSelect, 'id' | 'userId' | 'workflowId' | 'workspaceId' | 'type' @@ -160,7 +192,11 @@ export async function getAccessibleCopilotChat( .where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, userId))) .limit(1) - return authorizeCopilotChatRow(chat, chatId, userId) + const authorized = await authorizeCopilotChatRow(chat, chatId, userId) + if (!authorized) return null + + const messages = await loadCopilotChatMessages(chatId) + return { ...authorized, messages } } /** @@ -181,7 +217,11 @@ export async function getAccessibleCopilotChatWithMessages( .where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, userId))) .limit(1) - return authorizeCopilotChatRow(chat, chatId, userId) + const authorized = await authorizeCopilotChatRow(chat, chatId, userId) + if (!authorized) return null + + const messages = await loadCopilotChatMessages(chatId) + return { ...authorized, messages } } /** @@ -261,7 +301,7 @@ export async function resolveOrCreateChat(params: { messages: [], lastSeenAt: now, }) - .returning(copilotChatDetailColumns) + .returning(copilotChatDetailReturningColumns) if (!newChat) { logger.warn('Failed to create new copilot chat row', { userId, workflowId, workspaceId }) From 2e7f4ecd08f3610ef372d3750d82d87b0c74f799 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Fri, 29 May 2026 20:00:07 -0700 Subject: [PATCH 2/2] test(copilot): cover auth-deny on a found row skips the messages query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review: exercise the `if (!authorized) return null` contract — when the chat row exists but authorization fails, the getter returns null and never issues the copilot_messages read. Co-Authored-By: Claude Opus 4.8 --- apps/sim/lib/copilot/chat/lifecycle.test.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/apps/sim/lib/copilot/chat/lifecycle.test.ts b/apps/sim/lib/copilot/chat/lifecycle.test.ts index 3b1a84e632..df1689cc68 100644 --- a/apps/sim/lib/copilot/chat/lifecycle.test.ts +++ b/apps/sim/lib/copilot/chat/lifecycle.test.ts @@ -89,6 +89,17 @@ describe('lifecycle copilot chat reads (cutover to copilot_messages)', () => { expect(dbChainMockFns.orderBy).not.toHaveBeenCalled() }) + it('returns null and does NOT query messages when the row is found but authorization fails', async () => { + // Row exists but belongs to a workflow the user cannot read. + dbChainMockFns.limit.mockResolvedValueOnce([{ ...chatRow, workflowId: 'wf-1' }]) + mockAuthorizeWorkflow.mockResolvedValueOnce({ allowed: false, workflow: null }) + + const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID) + + expect(result).toBeNull() + expect(dbChainMockFns.orderBy).not.toHaveBeenCalled() + }) + it('legacy getAccessibleCopilotChat also assembles messages from copilot_messages', async () => { dbChainMockFns.limit.mockResolvedValueOnce([ { ...chatRow, model: 'm', planArtifact: null, config: null },