Skip to content

Commit f2e6225

Browse files
fix(tables): allow url sandbox entries in the function-execute contract; key snapshot by column shape so schema edits invalidate it
1 parent b4aab21 commit f2e6225

3 files changed

Lines changed: 83 additions & 33 deletions

File tree

apps/sim/lib/api/contracts/hotspots.ts

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,11 +162,20 @@ export const functionExecuteContract = defineRouteContract({
162162
isCustomTool: z.boolean().optional().default(false),
163163
_sandboxFiles: z
164164
.array(
165-
z.object({
166-
path: z.string(),
167-
content: z.string(),
168-
encoding: z.literal('base64').optional(),
169-
})
165+
z.union([
166+
z.object({
167+
type: z.literal('content').optional(),
168+
path: z.string(),
169+
content: z.string(),
170+
encoding: z.literal('base64').optional(),
171+
}),
172+
// Mounted by reference: the sandbox fetches `url` itself (no bytes through the web tier).
173+
z.object({
174+
type: z.literal('url'),
175+
path: z.string(),
176+
url: z.string(),
177+
}),
178+
])
170179
)
171180
.optional(),
172181
}),

apps/sim/lib/table/snapshot-cache.test.ts

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@ vi.mock('@/lib/uploads/core/storage-service', () => ({
2828
deleteFile: mockDeleteFile,
2929
}))
3030

31-
import {
32-
getOrCreateTableSnapshot,
33-
SNAPSHOT_MAX_BYTES,
34-
TableSnapshotTooLargeError,
35-
} from '@/lib/table/snapshot-cache'
31+
import { getOrCreateTableSnapshot, TableSnapshotTooLargeError } from '@/lib/table/snapshot-cache'
3632

3733
const table = {
3834
id: 'tbl_1',
@@ -86,7 +82,11 @@ describe('getOrCreateTableSnapshot', () => {
8682

8783
const ref = await getOrCreateTableSnapshot(table, 'req')
8884

89-
expect(ref).toEqual({ key: 'table-snapshots/ws_1/tbl_1/v3.csv', size: 42, version: 3 })
85+
expect(ref).toEqual({
86+
key: expect.stringMatching(/^table-snapshots\/ws_1\/tbl_1\/v3-[0-9a-f]{12}\.csv$/),
87+
size: 42,
88+
version: 3,
89+
})
9090
expect(mockCreateMultipartUpload).not.toHaveBeenCalled()
9191
expect(mockSelectExportRowPage).not.toHaveBeenCalled()
9292
})
@@ -98,25 +98,50 @@ describe('getOrCreateTableSnapshot', () => {
9898
const ref = await getOrCreateTableSnapshot(table, 'req')
9999

100100
expect(mockCreateMultipartUpload).toHaveBeenCalledWith(
101-
expect.objectContaining({ key: 'table-snapshots/ws_1/tbl_1/v3.csv', context: 'execution' })
101+
expect.objectContaining({
102+
key: expect.stringMatching(/^table-snapshots\/ws_1\/tbl_1\/v3-[0-9a-f]{12}\.csv$/),
103+
context: 'execution',
104+
})
102105
)
103106
expect(lastHandle?.content).toBe('name\nAda\n')
104107
expect(ref).toEqual({
105-
key: 'table-snapshots/ws_1/tbl_1/v3.csv',
108+
key: expect.stringMatching(/^table-snapshots\/ws_1\/tbl_1\/v3-[0-9a-f]{12}\.csv$/),
106109
size: Buffer.byteLength('name\nAda\n'),
107110
version: 3,
108111
})
109112
// Best-effort prune of v2.
110113
expect(mockDeleteFile).toHaveBeenCalledWith(
111-
expect.objectContaining({ key: 'table-snapshots/ws_1/tbl_1/v2.csv', context: 'execution' })
114+
expect.objectContaining({
115+
key: expect.stringMatching(/^table-snapshots\/ws_1\/tbl_1\/v2-[0-9a-f]{12}\.csv$/),
116+
context: 'execution',
117+
})
112118
)
113119
})
114120

115121
it('keys the snapshot by tenant — the same table id in another workspace gets a different key', async () => {
116122
versions(1)
117123
mockHeadObject.mockResolvedValue({ size: 1 })
118124
const ref = await getOrCreateTableSnapshot({ ...table, workspaceId: 'ws_2' }, 'req')
119-
expect(ref.key).toBe('table-snapshots/ws_2/tbl_1/v1.csv')
125+
expect(ref.key).toMatch(/^table-snapshots\/ws_2\/tbl_1\/v1-[0-9a-f]{12}\.csv$/)
126+
})
127+
128+
it('changes the key when the column shape changes (schema edits invalidate the cache)', async () => {
129+
versions(7, 7)
130+
mockHeadObject.mockResolvedValue({ size: 1 })
131+
132+
const a = await getOrCreateTableSnapshot(table, 'req')
133+
const b = await getOrCreateTableSnapshot(
134+
{
135+
...table,
136+
schema: { columns: [{ id: 'col_name', name: 'renamed', type: 'string' }] },
137+
} as never,
138+
'req'
139+
)
140+
141+
// Same workspace/table/row-version, but a renamed column flips the shape hash → different key.
142+
expect(a.key).not.toBe(b.key)
143+
expect(a.key).toMatch(/\/v7-[0-9a-f]{12}\.csv$/)
144+
expect(b.key).toMatch(/\/v7-[0-9a-f]{12}\.csv$/)
120145
})
121146

122147
it('re-keys and rebuilds when rows_version advances mid-scan', async () => {
@@ -134,21 +159,25 @@ describe('getOrCreateTableSnapshot', () => {
134159
const ref = await getOrCreateTableSnapshot(table, 'req')
135160

136161
expect(ref.version).toBe(4)
137-
expect(ref.key).toBe('table-snapshots/ws_1/tbl_1/v4.csv')
162+
expect(ref.key).toMatch(/^table-snapshots\/ws_1\/tbl_1\/v4-[0-9a-f]{12}\.csv$/)
138163
expect(mockCreateMultipartUpload).toHaveBeenCalledTimes(2)
139164
// the stale v3 object is dropped
140165
expect(mockDeleteFile).toHaveBeenCalledWith(
141-
expect.objectContaining({ key: 'table-snapshots/ws_1/tbl_1/v3.csv' })
166+
expect.objectContaining({
167+
key: expect.stringMatching(/^table-snapshots\/ws_1\/tbl_1\/v3-[0-9a-f]{12}\.csv$/),
168+
})
142169
)
143170
})
144171

145172
it('aborts and throws when the CSV exceeds the size cap', async () => {
146173
versions(1)
147174
mockHeadObject.mockResolvedValue(null)
148175
mockSelectExportRowPage.mockReset()
149-
mockSelectExportRowPage.mockResolvedValueOnce([
150-
{ id: 'r1', data: { col_name: 'x'.repeat(SNAPSHOT_MAX_BYTES + 10) }, position: 0 },
151-
])
176+
// A full batch of wide rows on every page → the materialize loop keeps paging until the running
177+
// byte count crosses the cap, then aborts. Peak memory stays at one page (~MBs), not the cap.
178+
const wideRow = { id: 'r', data: { col_name: 'x'.repeat(1000) }, position: 0 }
179+
const fullPage = Array.from({ length: 10000 }, () => wideRow)
180+
mockSelectExportRowPage.mockResolvedValue(fullPage)
152181

153182
await expect(getOrCreateTableSnapshot(table, 'req')).rejects.toBeInstanceOf(
154183
TableSnapshotTooLargeError

apps/sim/lib/table/snapshot-cache.ts

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* contain — and be addressed by — its owning tenant.
1212
*/
1313

14+
import { createHash } from 'crypto'
1415
import { db } from '@sim/db'
1516
import { userTableDefinitions } from '@sim/db/schema'
1617
import { createLogger } from '@sim/logger'
@@ -52,19 +53,25 @@ export class TableSnapshotTooLargeError extends Error {
5253
}
5354

5455
/**
55-
* Storage key for a table's snapshot at a given version.
56-
*
57-
* `projectionHash` is a forward seam: a future column-subset / filtered mount appends
58-
* `-{projectionHash}` so projections cache independently of the full snapshot.
56+
* Fingerprint of the table's column shape (id + display name + order). `rows_version` only advances
57+
* on row mutations (the trigger fires on `user_table_rows`), so without this a schema edit — rename,
58+
* add, remove, or reorder a column — would change the CSV header/columns but keep the same key and
59+
* serve a stale snapshot. Folding it into the key invalidates the cache on any schema change. This
60+
* is also the seam for a future column-subset / filtered projection (mix it into the same hash).
5961
*/
62+
function schemaFingerprint(table: TableDefinition): string {
63+
const shape = table.schema.columns.map((c) => [getColumnId(c), c.name])
64+
return createHash('sha1').update(JSON.stringify(shape)).digest('hex').slice(0, 12)
65+
}
66+
67+
/** Storage key for a table's snapshot at a given row version + column shape. */
6068
function snapshotKey(
6169
workspaceId: string,
6270
tableId: string,
6371
version: number,
64-
projectionHash?: string
72+
shapeHash: string
6573
): string {
66-
const suffix = projectionHash ? `-${projectionHash}` : ''
67-
return `table-snapshots/${workspaceId}/${tableId}/v${version}${suffix}.csv`
74+
return `table-snapshots/${workspaceId}/${tableId}/v${version}-${shapeHash}.csv`
6875
}
6976

7077
async function readRowsVersion(tableId: string): Promise<number> {
@@ -122,10 +129,14 @@ async function materialize(table: TableDefinition, key: string): Promise<number>
122129
}
123130

124131
/** Best-effort removal of the immediately-prior version (the common single-mutation case). */
125-
async function deletePreviousVersion(table: TableDefinition, version: number): Promise<void> {
132+
async function deletePreviousVersion(
133+
table: TableDefinition,
134+
version: number,
135+
shapeHash: string
136+
): Promise<void> {
126137
if (version <= 0) return
127138
await deleteFile({
128-
key: snapshotKey(table.workspaceId, table.id, version - 1),
139+
key: snapshotKey(table.workspaceId, table.id, version - 1, shapeHash),
129140
context: SNAPSHOT_STORAGE_CONTEXT,
130141
}).catch(() => {})
131142
}
@@ -142,8 +153,9 @@ export async function getOrCreateTableSnapshot(
142153
table: TableDefinition,
143154
requestId: string
144155
): Promise<TableSnapshotRef> {
156+
const shapeHash = schemaFingerprint(table)
145157
const version = await readRowsVersion(table.id)
146-
const key = snapshotKey(table.workspaceId, table.id, version)
158+
const key = snapshotKey(table.workspaceId, table.id, version, shapeHash)
147159

148160
const head = await headObject(key, SNAPSHOT_STORAGE_CONTEXT)
149161
if (head) {
@@ -163,14 +175,14 @@ export async function getOrCreateTableSnapshot(
163175
from: version,
164176
to: after,
165177
})
166-
const newKey = snapshotKey(table.workspaceId, table.id, after)
178+
const newKey = snapshotKey(table.workspaceId, table.id, after, shapeHash)
167179
const newHead = await headObject(newKey, SNAPSHOT_STORAGE_CONTEXT)
168180
const newSize = newHead ? newHead.size : await materialize(table, newKey)
169181
await deleteFile({ key, context: SNAPSHOT_STORAGE_CONTEXT }).catch(() => {})
170-
void deletePreviousVersion(table, after)
182+
void deletePreviousVersion(table, after, shapeHash)
171183
return { key: newKey, size: newSize, version: after }
172184
}
173185

174-
void deletePreviousVersion(table, version)
186+
void deletePreviousVersion(table, version, shapeHash)
175187
return { key, size, version }
176188
}

0 commit comments

Comments
 (0)