From 370642e6cbc707cc92ff4815be0946d81d2570d1 Mon Sep 17 00:00:00 2001
From: Waleed Latif <walif6@gmail.com>
Date: Fri, 29 May 2026 19:52:57 -0700
Subject: [PATCH 1/2] perf(copilot): read chat transcripts from
 copilot_messages, not JSONB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Flip user-facing chat reads from the legacy copilot_chats.messages JSONB
array (5.7GB, 99% TOAST) to the normalized copilot_messages table via a
new loadCopilotChatMessages helper ordered by seq NULLS LAST, created_at,
id — the verified canonical order. Both chat-detail getters
(getAccessibleCopilotChat, getAccessibleCopilotChatWithMessages) now drop
the messages column from their metadata select (no more whole-array
detoast on every load) and assemble the transcript from the table after
authorization. This cascades to the copilot + mothership GET endpoints
and to resolveOrCreateChat's conversationHistory (the LLM payload).

The normalize/effective-transcript pipeline is source-agnostic
(copilot_messages.content == a JSONB array element), so transcripts are
byte-identical. Dual-write and the JSONB column stay in place as the
internal-logic source and fallback; removing JSONB writes is a later step.

Prod integrity verified before cutover: 0 messages missing, 0 NULL-seq,
0 dup keys/seq, 0 orphans, order-parity vs JSONB = 0 mismatches.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 apps/sim/lib/copilot/chat/lifecycle.test.ts | 124 ++++++++++++++++++++
 apps/sim/lib/copilot/chat/lifecycle.ts      |  62 ++++++++--
 2 files changed, 175 insertions(+), 11 deletions(-)
 create mode 100644 apps/sim/lib/copilot/chat/lifecycle.test.ts

diff --git a/apps/sim/lib/copilot/chat/lifecycle.test.ts b/apps/sim/lib/copilot/chat/lifecycle.test.ts
new file mode 100644
index 0000000000..3b1a84e632
--- /dev/null
+++ b/apps/sim/lib/copilot/chat/lifecycle.test.ts
@@ -0,0 +1,124 @@
+/**
+ * @vitest-environment node
+ */
+import { dbChainMock, dbChainMockFns, resetDbChainMock } from '@sim/testing'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+vi.mock('@sim/db', () => dbChainMock)
+
+const { mockAuthorizeWorkflow, mockGetActiveWorkflow } = vi.hoisted(() => ({
+  mockAuthorizeWorkflow: vi.fn(),
+  mockGetActiveWorkflow: vi.fn(),
+}))
+
+vi.mock('@sim/workflow-authz', () => ({
+  authorizeWorkflowByWorkspacePermission: mockAuthorizeWorkflow,
+  getActiveWorkflowRecord: mockGetActiveWorkflow,
+}))
+
+vi.mock('@/lib/workspaces/permissions/utils', () => ({
+  assertActiveWorkspaceAccess: vi.fn(),
+  checkWorkspaceAccess: vi.fn(),
+}))
+
+import {
+  getAccessibleCopilotChat,
+  getAccessibleCopilotChatWithMessages,
+  resolveOrCreateChat,
+} from '@/lib/copilot/chat/lifecycle'
+
+const CHAT_ID = 'chat-1'
+const USER_ID = 'user-1'
+
+// A chat with no workflow/workspace skips the authz lookups and authorizes directly.
+const chatRow = {
+  id: CHAT_ID,
+  userId: USER_ID,
+  workflowId: null,
+  workspaceId: null,
+  type: 'copilot',
+  title: 'Test',
+  conversationId: null,
+  resources: [],
+  createdAt: new Date('2026-01-01T00:00:00.000Z'),
+  updatedAt: new Date('2026-01-01T00:00:00.000Z'),
+}
+
+const userMsg = { id: 'm-user', role: 'user', content: 'Hi', timestamp: '2026-01-01T00:00:00.000Z' }
+const asstMsg = {
+  id: 'm-asst',
+  role: 'assistant',
+  content: 'Hello',
+  timestamp: '2026-01-01T00:00:01.000Z',
+}
+
+describe('lifecycle copilot chat reads (cutover to copilot_messages)', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    resetDbChainMock()
+  })
+
+  it('getAccessibleCopilotChatWithMessages sources messages from copilot_messages in seq order', async () => {
+    // 1st query: chat metadata (select().from().where().limit())
+    dbChainMockFns.limit.mockResolvedValueOnce([chatRow])
+    // 2nd query: messages (select().from().where().orderBy())
+    dbChainMockFns.orderBy.mockResolvedValueOnce([{ content: userMsg }, { content: asstMsg }])
+
+    const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID)
+
+    expect(result).not.toBeNull()
+    expect(result?.messages).toEqual([userMsg, asstMsg])
+    expect(dbChainMockFns.orderBy).toHaveBeenCalledTimes(1)
+  })
+
+  it('returns an empty transcript for a chat with no messages', async () => {
+    dbChainMockFns.limit.mockResolvedValueOnce([chatRow])
+    dbChainMockFns.orderBy.mockResolvedValueOnce([])
+
+    const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID)
+
+    expect(result?.messages).toEqual([])
+  })
+
+  it('returns null and does NOT query messages when the chat is not found', async () => {
+    dbChainMockFns.limit.mockResolvedValueOnce([])
+
+    const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID)
+
+    expect(result).toBeNull()
+    expect(dbChainMockFns.orderBy).not.toHaveBeenCalled()
+  })
+
+  it('legacy getAccessibleCopilotChat also assembles messages from copilot_messages', async () => {
+    dbChainMockFns.limit.mockResolvedValueOnce([
+      { ...chatRow, model: 'm', planArtifact: null, config: null },
+    ])
+    dbChainMockFns.orderBy.mockResolvedValueOnce([{ content: userMsg }])
+
+    const result = await getAccessibleCopilotChat(CHAT_ID, USER_ID)
+
+    expect(result?.messages).toEqual([userMsg])
+  })
+
+  it('resolveOrCreateChat returns conversationHistory from the table for an existing chat', async () => {
+    dbChainMockFns.limit.mockResolvedValueOnce([chatRow])
+    dbChainMockFns.orderBy.mockResolvedValueOnce([{ content: userMsg }, { content: asstMsg }])
+
+    const result = await resolveOrCreateChat({ chatId: CHAT_ID, userId: USER_ID, model: 'm' })
+
+    expect(result.isNew).toBe(false)
+    expect(result.conversationHistory).toEqual([userMsg, asstMsg])
+  })
+
+  it('resolveOrCreateChat creates a new chat with an empty transcript', async () => {
+    // insert().values().returning() -> fresh chat with empty messages
+    dbChainMockFns.returning.mockResolvedValueOnce([{ ...chatRow, messages: [] }])
+
+    const result = await resolveOrCreateChat({ userId: USER_ID, model: 'm' })
+
+    expect(result.isNew).toBe(true)
+    expect(result.conversationHistory).toEqual([])
+    // a brand-new chat must not trigger a messages read
+    expect(dbChainMockFns.orderBy).not.toHaveBeenCalled()
+  })
+})
diff --git a/apps/sim/lib/copilot/chat/lifecycle.ts b/apps/sim/lib/copilot/chat/lifecycle.ts
index 0a9802d0f6..e86c127c10 100644
--- a/apps/sim/lib/copilot/chat/lifecycle.ts
+++ b/apps/sim/lib/copilot/chat/lifecycle.ts
@@ -1,11 +1,11 @@
 import { db } from '@sim/db'
-import { copilotChats } from '@sim/db/schema'
+import { copilotChats, copilotMessages } from '@sim/db/schema'
 import { createLogger } from '@sim/logger'
 import {
   authorizeWorkflowByWorkspacePermission,
   getActiveWorkflowRecord,
 } from '@sim/workflow-authz'
-import { and, eq } from 'drizzle-orm'
+import { and, asc, eq, isNull, sql } from 'drizzle-orm'
 import {
   assertActiveWorkspaceAccess,
   checkWorkspaceAccess,
@@ -35,22 +35,33 @@ const copilotChatAuthColumns = {
 } as const
 
 /**
- * Column set for chat-detail callers that need the conversation transcript but
- * not the copilot-only TOAST-able fields (`previewYaml`, `planArtifact`,
- * `config`) or unused metadata (`model`, `pinned`, `lastSeenAt`). Selecting
- * only these columns avoids the Postgres detoast cost on the dropped fields,
- * which dominates latency for chats with large message histories.
+ * Column set for chat-detail callers that need chat metadata. The conversation
+ * transcript is no longer selected from `copilot_chats.messages` (JSONB) —
+ * reads now source it from the normalized `copilot_messages` table via
+ * `loadCopilotChatMessages`, which avoids detoasting the large messages blob on
+ * every load. The copilot-only TOAST-able fields (`previewYaml`,
+ * `planArtifact`, `config`) and unused metadata (`model`, `pinned`,
+ * `lastSeenAt`) remain excluded.
  */
 const copilotChatDetailColumns = {
   ...copilotChatAuthColumns,
   title: copilotChats.title,
-  messages: copilotChats.messages,
   conversationId: copilotChats.conversationId,
   resources: copilotChats.resources,
   createdAt: copilotChats.createdAt,
   updatedAt: copilotChats.updatedAt,
 } as const
 
+/**
+ * Returning column set for newly-inserted chats. A fresh chat has no
+ * `copilot_messages` rows yet, so the transcript is the just-inserted empty
+ * JSONB array — return it directly rather than issuing a second query.
+ */
+const copilotChatDetailReturningColumns = {
+  ...copilotChatDetailColumns,
+  messages: copilotChats.messages,
+} as const
+
 /**
  * Column set for the legacy copilot chat detail endpoint. Extends
  * `copilotChatDetailColumns` with `model`, `planArtifact`, and `config` — the
@@ -64,6 +75,27 @@ const copilotChatLegacyDetailColumns = {
   config: copilotChats.config,
 } as const
 
+/**
+ * Load a chat's transcript from the normalized `copilot_messages` table in
+ * canonical order (`seq` first, then `created_at`/`id` as a deterministic
+ * tiebreak; `NULLS LAST` so any not-yet-sequenced row sorts after sequenced
+ * ones). Each row's `content` is the full message object — identical in shape
+ * to a legacy JSONB array element — so the downstream normalize/transcript
+ * pipeline is unchanged.
+ */
+async function loadCopilotChatMessages(chatId: string): Promise<Record<string, unknown>[]> {
+  const rows = await db
+    .select({ content: copilotMessages.content })
+    .from(copilotMessages)
+    .where(and(eq(copilotMessages.chatId, chatId), isNull(copilotMessages.deletedAt)))
+    .orderBy(
+      sql`${copilotMessages.seq} asc nulls last`,
+      asc(copilotMessages.createdAt),
+      asc(copilotMessages.id)
+    )
+  return rows.map((row) => row.content as Record<string, unknown>)
+}
+
 type CopilotChatAuthRow = Pick<
   typeof copilotChats.$inferSelect,
   'id' | 'userId' | 'workflowId' | 'workspaceId' | 'type'
@@ -160,7 +192,11 @@ export async function getAccessibleCopilotChat(
     .where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, userId)))
     .limit(1)
 
-  return authorizeCopilotChatRow(chat, chatId, userId)
+  const authorized = await authorizeCopilotChatRow(chat, chatId, userId)
+  if (!authorized) return null
+
+  const messages = await loadCopilotChatMessages(chatId)
+  return { ...authorized, messages }
 }
 
 /**
@@ -181,7 +217,11 @@ export async function getAccessibleCopilotChatWithMessages(
     .where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, userId)))
     .limit(1)
 
-  return authorizeCopilotChatRow(chat, chatId, userId)
+  const authorized = await authorizeCopilotChatRow(chat, chatId, userId)
+  if (!authorized) return null
+
+  const messages = await loadCopilotChatMessages(chatId)
+  return { ...authorized, messages }
 }
 
 /**
@@ -261,7 +301,7 @@ export async function resolveOrCreateChat(params: {
       messages: [],
       lastSeenAt: now,
     })
-    .returning(copilotChatDetailColumns)
+    .returning(copilotChatDetailReturningColumns)
 
   if (!newChat) {
     logger.warn('Failed to create new copilot chat row', { userId, workflowId, workspaceId })

From 2e7f4ecd08f3610ef372d3750d82d87b0c74f799 Mon Sep 17 00:00:00 2001
From: Waleed Latif <walif6@gmail.com>
Date: Fri, 29 May 2026 20:00:07 -0700
Subject: [PATCH 2/2] test(copilot): cover auth-deny on a found row skips the
 messages query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address PR review: exercise the `if (!authorized) return null` contract —
when the chat row exists but authorization fails, the getter returns null
and never issues the copilot_messages read.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 apps/sim/lib/copilot/chat/lifecycle.test.ts | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/apps/sim/lib/copilot/chat/lifecycle.test.ts b/apps/sim/lib/copilot/chat/lifecycle.test.ts
index 3b1a84e632..df1689cc68 100644
--- a/apps/sim/lib/copilot/chat/lifecycle.test.ts
+++ b/apps/sim/lib/copilot/chat/lifecycle.test.ts
@@ -89,6 +89,17 @@ describe('lifecycle copilot chat reads (cutover to copilot_messages)', () => {
     expect(dbChainMockFns.orderBy).not.toHaveBeenCalled()
   })
 
+  it('returns null and does NOT query messages when the row is found but authorization fails', async () => {
+    // Row exists but belongs to a workflow the user cannot read.
+    dbChainMockFns.limit.mockResolvedValueOnce([{ ...chatRow, workflowId: 'wf-1' }])
+    mockAuthorizeWorkflow.mockResolvedValueOnce({ allowed: false, workflow: null })
+
+    const result = await getAccessibleCopilotChatWithMessages(CHAT_ID, USER_ID)
+
+    expect(result).toBeNull()
+    expect(dbChainMockFns.orderBy).not.toHaveBeenCalled()
+  })
+
   it('legacy getAccessibleCopilotChat also assembles messages from copilot_messages', async () => {
     dbChainMockFns.limit.mockResolvedValueOnce([
       { ...chatRow, model: 'm', planArtifact: null, config: null },