diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index f38097a..c2467bd 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -192,11 +192,17 @@ boxel sync . --prefer-newest # Keep newest version boxel sync . --delete # Sync deletions both ways boxel sync . --dry-run # Preview only -boxel push ./local # One-way push (local → remote) -boxel push ./local --delete # Push and remove orphaned remote files -boxel pull ./local # One-way pull (remote → local) +boxel push ./local # One-way push (local → remote) +boxel push ./local --delete # Push and remove orphaned remote files +boxel push ./local --batch # Atomic batch upload (10/batch default) +boxel push ./local --batch --batch-size 25 # Custom batch size +boxel pull ./local # One-way pull (remote → local) ``` +> **Pull writes a manifest:** After `boxel pull ./local` downloads files, it automatically writes `.boxel-sync.json` so `boxel sync .` works immediately against the fresh directory. No manual step needed between pull and first sync. + +> **`push --batch`:** `.gts` definitions upload individually in dependency order; `.json` instances batch through `/_atomic` in groups of N. Faster for bulk pushes (50+ files). Binary files (images, fonts) and plain-text files (`.md`, `.csv`, `.yaml`) always take the per-file POST path because `/_atomic` only accepts card and source resource types. + **Failed download cleanup:** When `sync` encounters files that return 500 errors (broken/corrupted on server), it will prompt you to delete them: ``` ⚠️ 3 file(s) failed to download (server error): diff --git a/.gitignore b/.gitignore index 323e636..43937ca 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,10 @@ dist/ # Synced workspaces (never commit workspace content) stack.cards/ boxel.ai/ +boxel-workspaces/ +down/ +down-*/ +up/ .boxel-sync.json .boxel-history/ @@ -41,3 +45,36 @@ npm-debug.log* # Worktrees .claude/worktrees +.gstack/ + +# Runtime locks +.claude/scheduled_tasks.lock + +# ─── Drift guards ────────────────────────────────────────────────────────── +# Content that historically leaked into this repo while working in Boxel +# workspaces from a CWD inside boxel-cli. These belong to boxel's realm-server, +# host, or are workspace content — never boxel-cli source. Fail closed: if +# any of these reappear, they stay untracked instead of silently committed. + +# Design docs about Boxel platform (not CLI) +/docs/yjs-*.md +/docs/realm-*.md +/docs/*collaboration*.md +/docs/boxel-package-*.md +/docs/catalog-*.md +/docs/card-field-*.md +/docs/cross-realm-*.md +/docs/llm-wiki/ + +# Data-generation scripts for realms (belong with the workspace they feed) +/scripts/northwind* +/scripts/generate-* +/scripts/fetch-northwind.mjs +/scripts/northwind-cache/ + +# JQXL engine tests (the engine lives in realm-server, not here) +/test/helpers/jqxl-* +/test/lib/jqxl-* + +# Misplaced Claude skills (add legit skills to .claude/commands/ by hand) +/.claude/commands/extract-theme.md diff --git a/AGENTS.md b/AGENTS.md index 152bd24..427af99 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -33,7 +33,7 @@ Trigger examples: - Working in a synced workspace (`.boxel-sync.json` present) ## Core Command Semantics -- `pull`: remote -> local +- `pull`: remote -> local (also writes `.boxel-sync.json` so `sync` works immediately) - `push`: local -> remote - `sync`: bidirectional conflict resolution - `track`: local file watching with auto-checkpoints (use `--push` for real-time server sync) @@ -112,11 +112,34 @@ boxel gather . -s /path/to/repo ## Batch Upload API The CLI supports batch uploads via the `/_atomic` endpoint: -- Used by `track --push` for efficient multi-file uploads +- Used by `track --push` and `push --batch` for efficient multi-file uploads - Sorts definitions (.gts) before instances (.json) for proper indexing - Fallback strategy: full batch → smaller batches → individual uploads - See `src/lib/batch-upload.ts` for implementation +### Content-type routing (since 1.0.1) +Before sending bytes anywhere, the uploader decides which path a file takes based on extension (see `src/lib/content-type.ts`): + +| File class | Examples | Path | Content-Type | Accept | +|---|---|---|---|---| +| Compilable source | `.gts`, `.ts`, `.tsx`, `.js`, `.jsx`, `.cjs`, `.mjs`, `.css`, `.scss`, `.less`, `.sass`, `.html` | `/_atomic` (type: `source`) | per-extension MIME | `application/vnd.card+source` | +| Card JSON | `.json` | `/_atomic` (type: `card`, fallback `source` on parse failure) | `application/json` | `application/vnd.card+source` | +| Plain text, non-source | `.md`, `.txt`, `.csv`, `.yaml`, `.xml` | per-file POST | per-extension MIME | `*/*` | +| Binary | `.png`, `.jpg`, `.woff`, `.pdf`, `.zip`, etc. | per-file POST | per-extension MIME or `application/octet-stream` | `*/*` | + +Rationale: `/_atomic` rejects anything its module compiler can't parse. Plain text and binary files need their raw bytes stored directly, which only the per-file POST endpoint does correctly. + +### Manifest shape (since 1.0.1) +All three sync commands agree on one `.boxel-sync.json` shape: +```ts +interface SyncManifest { + workspaceUrl: string; + lastSyncTime?: number; + files: Record; +} +``` +`push.ts` migrates the pre-1.0.1 format (`files[path] = hashString`) on read. New writes always use the object form. Mirror this shape if adding a new command that touches the manifest. + ## Notes for Agents Editing This Repo - Prefer minimal, targeted command changes in `src/commands/*.ts`. - Validate with local build/tests when feasible. diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..63eed71 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,31 @@ +# Changelog + +All notable changes to `boxel-cli`. Format loosely follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); versions follow [SemVer](https://semver.org/spec/v2.0.0.html). + +## 1.0.1 — 2026-04-20 + +### New + +- `boxel push --batch [--batch-size N]` — atomic bulk upload. Definitions upload individually in dependency order (so FieldDefs land before CardDefs that contain them); instances batch through `/_atomic` in groups of N (default 10). Faster and quieter than per-file POST on pushes of 50+ files, and reduces UI re-indexing churn. +- `boxel pull ./local` writes `.boxel-sync.json` automatically after a fresh download. You can now run `boxel sync .` immediately against a just-pulled directory with no manual intermediate step. + +### Fixed + +- **Binary upload corruption.** Images, fonts, PDFs, and other non-text files were being routed through the `/_atomic` JSON endpoint with text encoding, corrupting the bytes. Binary files now take the per-file POST path with `application/octet-stream`. +- **Plain-text file rejection.** `.md`, `.csv`, `.yaml`, `.xml`, and `.txt` uploads were being rejected by the realm's module compiler as "invalid source". Plain-text files now take the per-file POST path with their true MIME type. +- **Manifest shape drift between push and pull.** `push` and `pull` had diverged on the shape of `.boxel-sync.json`. Mixed-command workflows (pull → push or pull → sync → push) could mark every file as changed on the next run. All three commands now use one canonical shape; `push` migrates the pre-1.0.1 bare-string format on read. +- **Partial-failure batch marks files as synced.** In `--batch` mode, the manifest was updated for every file in a batch whenever any file succeeded, even if some of them had failed. Failed uploads could be silently stranded without retry. The manifest now tracks only files that successfully uploaded; failures stay out and get retried on the next run. +- **`boxel --version` reported wrong number.** The CLI had a hardcoded version string that drifted from `package.json`. Version is now sourced from `package.json` at runtime. +- **`--batch-size` silently accepted garbage.** `--batch-size abc` or `--batch-size -5` used to flow through as `NaN` / negative and cause weird behavior downstream. Non-positive-integer input now fails fast with a clear error. + +### For contributors + +- New `src/lib/content-type.ts` — single source of truth mapping file extension → MIME type → upload-path decision. Any extension you add for atomic-compatibility should also go here. +- New drift-guards section in `.gitignore` — prevents Boxel platform docs, workspace dirs, and other content that commonly ends up at the repo root from leaking into commits. +- `AGENTS.md` now documents the content-type routing table (file class → path → headers) and the canonical manifest shape, so future additions to `batch-upload.ts` or any manifest-touching command have one reference. + +--- + +## 1.0.0 — 2026-02-13 + +Initial public release. Core sync, push, pull, watch, track, history, profiles, multi-realm config, realm repair, share/gather GitHub workflow, skill-based Claude Code integration. diff --git a/README.md b/README.md index 9bd09f6..d7ed1cc 100644 --- a/README.md +++ b/README.md @@ -218,11 +218,17 @@ boxel sync . --prefer-newest # Keep newest by timestamp boxel sync . --delete # Sync deletions both ways boxel sync . --dry-run # Preview only -boxel push ./local # One-way push (local → remote) -boxel push ./local --delete # Push and remove orphaned remote files -boxel pull ./local # One-way pull (remote → local) +boxel push ./local # One-way push (local → remote) +boxel push ./local --delete # Push and remove orphaned remote files +boxel push ./local --batch # Atomic batch upload (10 files per batch) +boxel push ./local --batch --batch-size 25 # Custom batch size +boxel pull ./local # One-way pull (remote → local) ``` +> **Note:** `boxel pull` writes `.boxel-sync.json` automatically after a fresh download, so you can run `boxel sync .` immediately against a freshly-pulled workspace with no extra setup. + +> **`--batch` mode** (push only): `.gts` definitions upload individually in dependency order, then `.json` instances batch through the server's `/_atomic` endpoint in groups of N. Meaningfully faster on big pushes (50+ files) and reduces UI flashing during server re-indexing. Binary files (images, fonts) and plain-text files (`.md`, `.csv`, `.yaml`) are routed to per-file POST regardless of mode, because `/_atomic` only accepts card and source resource types. + **Failed download cleanup:** When `sync` encounters files that return 500 errors (broken on server), it will prompt you to delete them: ``` ⚠️ 3 file(s) failed to download (server error): @@ -665,12 +671,19 @@ When you open this repo in Claude Code, it will guide you through setup and prov --- +## Release notes + +See [CHANGELOG.md](CHANGELOG.md) for per-version changes. + +--- + ## Contributing PRs welcome! Please ensure: - Code passes linting (`npm run lint`) - New features have documentation - Breaking changes are noted in PR description +- Add a bullet to `CHANGELOG.md` under the in-progress version (or start a new `## Unreleased` section if one doesn't exist) --- diff --git a/package.json b/package.json index 7bcf64a..670833d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "boxel-cli", - "version": "1.0.0", + "version": "1.0.1", "description": "CLI for bidirectional sync between local directories and Boxel workspaces", "type": "module", "main": "dist/index.js", diff --git a/src/commands/check.ts b/src/commands/check.ts index 0877094..2201c6f 100644 --- a/src/commands/check.ts +++ b/src/commands/check.ts @@ -11,7 +11,7 @@ interface SyncManifest { files: Record; } -function computeFileHash(content: string): string { +function computeFileHash(content: string | Buffer): string { return crypto.createHash('md5').update(content).digest('hex'); } @@ -72,7 +72,7 @@ export async function checkCommand( const relativePath = path.relative(workspaceRoot, absolutePath).replace(/\\/g, '/'); // Read local file - const localContent = fs.readFileSync(absolutePath, 'utf-8'); + const localContent = fs.readFileSync(absolutePath); const localHash = computeFileHash(localContent); const localMtime = fs.statSync(absolutePath).mtimeMs; diff --git a/src/commands/pull.ts b/src/commands/pull.ts index 55d085c..65713a7 100644 --- a/src/commands/pull.ts +++ b/src/commands/pull.ts @@ -2,6 +2,18 @@ import { RealmSyncBase, validateMatrixEnvVars, type SyncOptions } from '../lib/r import { CheckpointManager, type CheckpointChange } from '../lib/checkpoint-manager.js'; import * as fs from 'fs'; import * as path from 'path'; +import * as crypto from 'crypto'; + +interface SyncManifest { + workspaceUrl: string; + lastSyncTime: number; + files: Record; +} + +function computeFileHash(filePath: string): string { + const content = fs.readFileSync(filePath); + return crypto.createHash('md5').update(content).digest('hex'); +} interface PullOptions extends SyncOptions { deleteLocal?: boolean; @@ -104,6 +116,29 @@ class RealmPuller extends RealmSyncBase { } } + // Create sync manifest so subsequent `boxel sync` knows files are in sync + if (!this.options.dryRun && downloadedFiles.length > 0) { + const remoteMtimes = await this.getRemoteMtimes(); + const manifest: SyncManifest = { + workspaceUrl: this.options.workspaceUrl, + lastSyncTime: Date.now(), + files: {}, + }; + + for (const relativePath of downloadedFiles) { + const localPath = path.join(this.options.localDir, relativePath); + if (fs.existsSync(localPath)) { + manifest.files[relativePath] = { + localHash: computeFileHash(localPath), + remoteMtime: remoteMtimes.get(relativePath) || Math.floor(Date.now() / 1000), + }; + } + } + + const manifestPath = path.join(this.options.localDir, '.boxel-sync.json'); + fs.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2)); + } + // Create checkpoint for pulled files if (!this.options.dryRun && downloadedFiles.length > 0) { const checkpointManager = new CheckpointManager(this.options.localDir); diff --git a/src/commands/push.ts b/src/commands/push.ts index 4be6be2..c1d36e1 100644 --- a/src/commands/push.ts +++ b/src/commands/push.ts @@ -1,12 +1,36 @@ import { RealmSyncBase, validateMatrixEnvVars, isProtectedFile, type SyncOptions } from '../lib/realm-sync-base.js'; import { CheckpointManager, type CheckpointChange } from '../lib/checkpoint-manager.js'; +import { uploadWithBatching, type FileToUpload } from '../lib/batch-upload.js'; import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; +interface SyncManifestFile { + localHash: string; // md5 of file bytes at last sync + remoteMtime: number; // remote mtime at last sync (seconds) +} + interface SyncManifest { workspaceUrl: string; - files: Record; // relativePath -> contentHash + lastSyncTime?: number; + files: Record; +} + +// Old push-only manifest format kept for migration. Matches the detector in sync.ts. +interface OldManifest { + workspaceUrl: string; + files: Record; +} + +function isOldManifest(m: unknown): m is OldManifest { + if (!m || typeof m !== 'object') return false; + const files = (m as { files?: unknown }).files; + if (!files || typeof files !== 'object') return false; + for (const v of Object.values(files as Record)) { + if (typeof v === 'string') return true; + if (v && typeof v === 'object') return false; + } + return false; } function computeFileHash(filePath: string): string { @@ -16,14 +40,24 @@ function computeFileHash(filePath: string): string { function loadManifest(localDir: string): SyncManifest | null { const manifestPath = path.join(localDir, '.boxel-sync.json'); - if (fs.existsSync(manifestPath)) { - try { - return JSON.parse(fs.readFileSync(manifestPath, 'utf8')); - } catch { - return null; + if (!fs.existsSync(manifestPath)) return null; + try { + const raw = JSON.parse(fs.readFileSync(manifestPath, 'utf8')); + if (isOldManifest(raw)) { + // Migrate: old { files: {path: hash} } → new { files: {path: {localHash, remoteMtime: 0}} } + const migrated: SyncManifest = { + workspaceUrl: raw.workspaceUrl, + files: {}, + }; + for (const [p, hash] of Object.entries(raw.files)) { + migrated.files[p] = { localHash: hash, remoteMtime: 0 }; + } + return migrated; } + return raw as SyncManifest; + } catch { + return null; } - return null; } function saveManifest(localDir: string, manifest: SyncManifest): void { @@ -34,6 +68,8 @@ function saveManifest(localDir: string, manifest: SyncManifest): void { interface PushOptions extends SyncOptions { deleteRemote?: boolean; force?: boolean; + batch?: boolean; + batchSize?: number; } class RealmPusher extends RealmSyncBase { @@ -102,14 +138,15 @@ class RealmPusher extends RealmSyncBase { continue; } const currentHash = computeFileHash(localPath); - const previousHash = manifest.files[relativePath]; + const previousEntry = manifest.files[relativePath]; + const previousHash = previousEntry?.localHash; if (previousHash !== currentHash) { filesToUpload.set(relativePath, localPath); } else { skipped++; - // Keep the hash in new manifest - newManifest.files[relativePath] = currentHash; + // Keep the entry in new manifest (preserve remoteMtime) + newManifest.files[relativePath] = previousEntry; } } @@ -120,6 +157,71 @@ class RealmPusher extends RealmSyncBase { if (filesToUpload.size === 0) { console.log('No files to upload - everything is up to date'); + } else if (this.pushOptions.batch) { + // Batch upload mode: .gts files individually (in dependency order), .json via /_atomic + const batchSize = this.pushOptions.batchSize ?? 10; + + // Determine operation type: 'add' for new files, 'update' for existing + const remoteFiles = await this.getRemoteFileList(); + const allFiles: FileToUpload[] = Array.from(filesToUpload.entries()).map(([relativePath, localPath]) => ({ + relativePath, + localPath, + operation: remoteFiles.has(relativePath) ? 'update' as const : 'add' as const, + })); + + // Separate definitions from instances + const { sortDefinitionsFirst } = await import('../lib/batch-upload.js'); + const sorted = sortDefinitionsFirst(allFiles); + const definitions = sorted.filter(f => f.relativePath.endsWith('.gts')); + const instances = sorted.filter(f => !f.relativePath.endsWith('.gts')); + + // Upload .gts files individually in dependency order + if (definitions.length > 0) { + console.log(`Uploading ${definitions.length} definition(s) in dependency order...`); + for (const file of definitions) { + try { + await this.uploadFile(file.relativePath, file.localPath); + newManifest.files[file.relativePath] = { + localHash: computeFileHash(file.localPath), + remoteMtime: 0, // will be filled in by the remote-mtime refresh below + }; + } catch (error) { + this.hasError = true; + console.error(`Error uploading ${file.relativePath}:`, error); + } + } + } + + // Batch upload .json files via /_atomic + if (instances.length > 0) { + console.log(`Batch uploading ${instances.length} instance(s) (${batchSize} per batch)...`); + const jwt = await this.realmAuthClient.getJWT(); + const result = await uploadWithBatching(instances, this.options.workspaceUrl, jwt, { + batchSize, + definitionsFirst: false, // already separated + dryRun: this.options.dryRun, + }); + + // Mark only SUCCESSFUL files in the manifest. Failures stay out so the + // next run retries them. + const failedPaths = new Set(result.errors.map(e => e.path)); + for (const file of instances) { + if (failedPaths.has(file.relativePath)) continue; + if (fs.existsSync(file.localPath)) { + newManifest.files[file.relativePath] = { + localHash: computeFileHash(file.localPath), + remoteMtime: 0, // refreshed below + }; + } + } + + if (result.failed > 0) { + this.hasError = true; + for (const err of result.errors) { + console.error(`Error uploading ${err.path}: ${err.error}`); + } + } + } } else { console.log(`Uploading ${filesToUpload.size} file(s)...`); @@ -127,7 +229,10 @@ class RealmPusher extends RealmSyncBase { try { await this.uploadFile(relativePath, localPath); // Add to manifest after successful upload - newManifest.files[relativePath] = computeFileHash(localPath); + newManifest.files[relativePath] = { + localHash: computeFileHash(localPath), + remoteMtime: 0, // refreshed below + }; } catch (error) { this.hasError = true; console.error(`Error uploading ${relativePath}:`, error); @@ -167,6 +272,29 @@ class RealmPusher extends RealmSyncBase { } } + // Refresh remote mtimes for every file we just put in the manifest so + // subsequent sync/pull operations don't mistakenly see remote-as-changed. + // Skip on dry-run (no network side effects). + if (!this.options.dryRun && Object.keys(newManifest.files).length > 0) { + try { + const remoteMtimes = await this.getRemoteMtimes(); + for (const [relPath, entry] of Object.entries(newManifest.files)) { + const mtime = remoteMtimes.get(relPath); + if (typeof mtime === 'number') { + entry.remoteMtime = mtime; + } else if (entry.remoteMtime === 0) { + // Fall back to local time in seconds; imperfect but better than 0 + entry.remoteMtime = Math.floor(Date.now() / 1000); + } + } + } catch (err) { + // Non-fatal: manifest entries stay with remoteMtime:0 and sync will + // reconcile on next run + console.warn('Warning: could not refresh remote mtimes for manifest:', err); + } + newManifest.lastSyncTime = Date.now(); + } + // Save manifest for future incremental syncs if (!this.options.dryRun) { saveManifest(this.options.localDir, newManifest); @@ -194,6 +322,8 @@ export interface PushCommandOptions { delete?: boolean; dryRun?: boolean; force?: boolean; + batch?: boolean; + batchSize?: number; } export async function pushCommand( @@ -217,6 +347,8 @@ export async function pushCommand( deleteRemote: options.delete, dryRun: options.dryRun, force: options.force, + batch: options.batch, + batchSize: options.batchSize, }, matrixUrl, username, diff --git a/src/commands/status.ts b/src/commands/status.ts index 7dd233f..6b8f148 100644 --- a/src/commands/status.ts +++ b/src/commands/status.ts @@ -19,7 +19,7 @@ interface FileStatus { remoteMtime?: number; } -function computeFileHash(content: string): string { +function computeFileHash(content: string | Buffer): string { return crypto.createHash('md5').update(content).digest('hex'); } @@ -201,7 +201,7 @@ async function analyzeChanges( let localChanged = false; if (existsLocally) { - const content = fs.readFileSync(localPath, 'utf-8'); + const content = fs.readFileSync(localPath); const hash = computeFileHash(content); localChanged = hash !== manifestEntry.localHash; } @@ -305,7 +305,7 @@ async function statusSingle( let localChanged = false; if (existsLocally) { - const content = fs.readFileSync(localPath, 'utf-8'); + const content = fs.readFileSync(localPath); const hash = computeFileHash(content); localChanged = hash !== manifestEntry.localHash; } diff --git a/src/index.ts b/src/index.ts index 9061748..e580336 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,17 @@ #!/usr/bin/env node import 'dotenv/config'; -import { Command } from 'commander'; +import { Command, InvalidArgumentError } from 'commander'; + +/** Parse a positive integer from a CLI flag; throw a friendly error otherwise. */ +function parsePositiveInt(raw: string, _prev: unknown): number { + const n = parseInt(raw, 10); + if (!Number.isFinite(n) || n < 1) { + throw new InvalidArgumentError(`expected a positive integer, got "${raw}"`); + } + return n; +} + import { pushCommand } from './commands/push.js'; import { pullCommand } from './commands/pull.js'; import { listCommand } from './commands/list.js'; @@ -26,13 +36,19 @@ import { repairRealmCommand, repairRealmsCommand } from './commands/repair.js'; import { consolidateWorkspacesCommand } from './commands/consolidate.js'; import { loadConfig } from './lib/realm-config.js'; import { warnIfLegacyWorkspacePaths } from './lib/workspace-paths.js'; +import { createRequire } from 'module'; + +// Read version from package.json so `boxel --version` stays in sync with the +// published package. Using require() avoids ESM JSON-import assertion syntax +// that varies across Node versions. +const pkg = createRequire(import.meta.url)('../package.json') as { version: string }; const program = new Command(); program .name('boxel') .description('CLI tools for syncing files between local directories and Boxel workspaces') - .version('1.0.0'); + .version(pkg.version); program.hook('preAction', (_thisCommand, actionCommand) => { const commandName = actionCommand.name(); @@ -65,7 +81,9 @@ program .option('--delete', 'Delete remote files that do not exist locally') .option('--dry-run', 'Show what would be done without making changes') .option('--force', 'Upload all files, even if unchanged') - .action(async (localDir: string, workspaceUrl: string, options: { delete?: boolean; dryRun?: boolean; force?: boolean }) => { + .option('--batch', 'Use atomic batch upload for faster bulk operations (10 files per batch)') + .option('--batch-size ', 'Files per batch when using --batch (default: 10)', parsePositiveInt) + .action(async (localDir: string, workspaceUrl: string, options: { delete?: boolean; dryRun?: boolean; force?: boolean; batch?: boolean; batchSize?: number }) => { await pushCommand(localDir, workspaceUrl, options); }); diff --git a/src/lib/batch-upload.ts b/src/lib/batch-upload.ts index 30e824c..4926392 100644 --- a/src/lib/batch-upload.ts +++ b/src/lib/batch-upload.ts @@ -7,6 +7,44 @@ import * as fs from 'fs'; import * as path from 'path'; +import { getContentType, isTextFile, readFileForUpload } from './content-type.js'; + +/** + * Binary files (images, fonts, archives, etc.) cannot be sent through the + * /_atomic JSON endpoint because their bytes don't survive UTF-8 encoding. + * Route them through individual POST uploads (which use octet-stream). + */ +function isBinaryFile(file: FileToUpload): boolean { + return !isTextFile(getContentType(file.relativePath)); +} + +const ATOMIC_SOURCE_EXTENSIONS = new Set([ + '.gts', + '.ts', + '.tsx', + '.js', + '.jsx', + '.mjs', + '.cjs', + '.css', + '.scss', + '.less', + '.sass', + '.html', +]); + +/** + * The /_atomic endpoint only accepts 'card' and 'source' resource types. + * Plain text files (.md, .txt, .csv, .yaml, etc.) are neither cards nor + * compilable source modules — the realm's module compiler rejects them as + * invalid source. Route them through individual POST uploads so they are + * stored as raw files with their correct Content-Type. + */ +function isAtomicIncompatible(file: FileToUpload): boolean { + if (file.relativePath.endsWith('.json')) return false; + const ext = path.extname(file.relativePath).toLowerCase(); + return !ATOMIC_SOURCE_EXTENSIONS.has(ext); +} // ANSI color codes const FG_GREEN = '\x1b[32m'; @@ -19,7 +57,7 @@ const RESET = '\x1b[0m'; export interface FileToUpload { relativePath: string; localPath: string; - content?: string; + content?: string | Buffer; operation: 'add' | 'update'; } @@ -65,6 +103,23 @@ const DEFAULT_OPTIONS: BatchOptions = { verbose: false, }; +function getTextContent(file: FileToUpload): string { + if (typeof file.content === 'string') { + return file.content; + } + const content = fs.readFileSync(file.localPath, 'utf8'); + file.content = content; + return content; +} + +function getUploadPayload(file: FileToUpload): { content: string | Buffer; contentType: string } { + if (typeof file.content === 'string' || Buffer.isBuffer(file.content)) { + return { content: file.content, contentType: getContentType(file.relativePath) }; + } + + return readFileForUpload(file.relativePath, file.localPath); +} + // Verbose logging helper function verbose(opts: Partial, message: string, ...args: unknown[]): void { if (opts.verbose) { @@ -73,17 +128,121 @@ function verbose(opts: Partial, message: string, ...args: unknown[ } /** - * Sort files so definitions (.gts) come before instances (.json) + * Sort files so definitions (.gts) come before instances (.json), + * and within .gts files, sort by dependency order (least dependent first). + * + * Dependency detection: scans import statements in .gts files to determine + * which files import others. Files with no local imports come first (FieldDefs, + * base types), then files that import those, etc. */ export function sortDefinitionsFirst(files: FileToUpload[]): FileToUpload[] { - return [...files].sort((a, b) => { - const aIsDefinition = a.relativePath.endsWith('.gts'); - const bIsDefinition = b.relativePath.endsWith('.gts'); + const definitions = files.filter(f => f.relativePath.endsWith('.gts')); + const instances = files.filter(f => !f.relativePath.endsWith('.gts')); - if (aIsDefinition && !bIsDefinition) return -1; - if (!aIsDefinition && bIsDefinition) return 1; - return a.relativePath.localeCompare(b.relativePath); - }); + // Build dependency graph for .gts files + const depOrder = sortByDependency(definitions); + + // Definitions first (in dependency order), then instances alphabetically + return [ + ...depOrder, + ...instances.sort((a, b) => a.relativePath.localeCompare(b.relativePath)), + ]; +} + +/** + * Sort .gts files by dependency order using topological sort. + * Files that import nothing local come first; files that import others come later. + */ +function sortByDependency(files: FileToUpload[]): FileToUpload[] { + // Map filename (without extension) to file + const byName = new Map(); + for (const f of files) { + const name = path.basename(f.relativePath, '.gts'); + byName.set(name, f); + } + + // Parse imports to build adjacency list + const deps = new Map>(); + for (const f of files) { + const name = path.basename(f.relativePath, '.gts'); + const content = + typeof f.content === 'string' + ? f.content + : fs.existsSync(f.localPath) + ? fs.readFileSync(f.localPath, 'utf8') + : ''; + if (content) { + f.content = content; + } + + const localImports = new Set(); + // Match: import { X } from './name' or from './name.gts' + const importRegex = /from\s+['"]\.\/([^'"]+)['"]/g; + let match; + while ((match = importRegex.exec(content)) !== null) { + const imported = match[1].replace(/\.gts$/, ''); + if (byName.has(imported)) { + localImports.add(imported); + } + } + deps.set(name, localImports); + } + + // Topological sort (Kahn's algorithm) + const inDegree = new Map(); + for (const name of byName.keys()) inDegree.set(name, 0); + for (const [, depSet] of deps) { + for (const dep of depSet) { + inDegree.set(dep, (inDegree.get(dep) ?? 0) + 1); + } + } + + // Note: we want files with NO dependents (leaf nodes) first + // Actually, we want files that nothing depends ON first (no incoming edges + // in the "is imported by" graph), which means files that import nothing. + // Kahn's on the dependency graph: start with nodes that have no dependencies. + const inDeg = new Map(); + for (const name of byName.keys()) inDeg.set(name, 0); + for (const [name, depSet] of deps) { + inDeg.set(name, depSet.size); + } + + const queue: string[] = []; + for (const [name, deg] of inDeg) { + if (deg === 0) queue.push(name); + } + + const sorted: FileToUpload[] = []; + const visited = new Set(); + + while (queue.length > 0) { + queue.sort(); // deterministic order + const name = queue.shift()!; + if (visited.has(name)) continue; + visited.add(name); + + const file = byName.get(name); + if (file) sorted.push(file); + + // Find files that depend on this one and decrement their in-degree + for (const [other, depSet] of deps) { + if (depSet.has(name)) { + const newDeg = (inDeg.get(other) ?? 1) - 1; + inDeg.set(other, newDeg); + if (newDeg === 0 && !visited.has(other)) { + queue.push(other); + } + } + } + } + + // Add any remaining files (circular deps) + for (const f of files) { + const name = path.basename(f.relativePath, '.gts'); + if (!visited.has(name)) sorted.push(f); + } + + return sorted; } /** @@ -99,9 +258,8 @@ export function createBatches( const maxPayloadBytes = options.maxPayloadKB * 1024; for (const file of files) { - const content = file.content || fs.readFileSync(file.localPath, 'utf8'); + const content = getTextContent(file); const fileSize = Buffer.byteLength(content, 'utf8'); - file.content = content; // Cache for later use // If single file exceeds max payload, give it its own batch if (fileSize > maxPayloadBytes) { @@ -146,7 +304,7 @@ export function buildAtomicRequest( realmUrl: string ): AtomicRequest { const operations: AtomicOperation[] = files.map(file => { - const content = file.content || fs.readFileSync(file.localPath, 'utf8'); + const content = getTextContent(file); const isCard = file.relativePath.endsWith('.json'); if (isCard) { @@ -171,7 +329,7 @@ export function buildAtomicRequest( op: file.operation, href: `${realmUrl}${file.relativePath}`, data: { - type: 'file', + type: 'source', attributes: { content: content, }, @@ -179,7 +337,9 @@ export function buildAtomicRequest( }; } } else { - // For source files (.gts, etc.), send as content + // For source code (.gts, .ts, .css, .html, etc.), send as source module. + // Non-source-code text files are filtered out of the atomic batch by + // isAtomicIncompatible() and uploaded via individual POST instead. return { op: file.operation, href: `${realmUrl}${file.relativePath}`, @@ -326,25 +486,32 @@ export async function uploadSingleFile( }; } - const content = file.content || fs.readFileSync(file.localPath, 'utf8'); + const { content, contentType } = getUploadPayload(file); const url = `${realmUrl}${file.relativePath}`; + // Accept: compilable source types expect 'application/vnd.card+source' back + // from the realm; binary + plain-text files want the raw bytes returned as-is. + const acceptHeader = isTextFile(contentType) && !isAtomicIncompatible(file) + ? 'application/vnd.card+source' + : '*/*'; + try { const response = await fetch(url, { method: 'POST', headers: { - 'Content-Type': 'text/plain;charset=UTF-8', + 'Content-Type': contentType, 'Authorization': jwt, - 'Accept': 'application/vnd.card+source', + 'Accept': acceptHeader, }, body: content, }); if (!response.ok) { + const body = await response.text().catch(() => ''); return { success: false, filesUploaded: 0, - errors: [{ path: file.relativePath, error: `HTTP ${response.status}` }], + errors: [{ path: file.relativePath, error: `HTTP ${response.status}: ${body.slice(0, 200)}` }], timeMs: Date.now() - startTime, }; } @@ -389,11 +556,21 @@ export async function uploadWithBatching( verbose(opts, `uploadWithBatching called with ${files.length} files`); verbose(opts, `Options: batchSize=${opts.batchSize}, definitionsFirst=${opts.definitionsFirst}, quiet=${opts.quiet}`); + // Split out files that cannot go through /_atomic: + // - binary files (bytes don't survive UTF-8 stringification) + // - plain text files like .md/.txt/.csv (not valid source modules, + // rejected by the realm's module compiler) + // Both kinds route through individual POST uploads with their correct + // Content-Type so the server stores them as raw files. + const individualFiles = files.filter(f => isBinaryFile(f) || isAtomicIncompatible(f)); + const textFiles = files.filter(f => !isBinaryFile(f) && !isAtomicIncompatible(f)); + verbose(opts, `Split: ${textFiles.length} atomic-compatible, ${individualFiles.length} individual`); + // Sort definitions first if requested - let sortedFiles = opts.definitionsFirst ? sortDefinitionsFirst(files) : files; + let sortedFiles = opts.definitionsFirst ? sortDefinitionsFirst(textFiles) : textFiles; verbose(opts, `After sorting: ${sortedFiles.map(f => f.relativePath).join(', ')}`); - // Create batches + // Create batches (text only) const batches = createBatches(sortedFiles, opts); verbose(opts, `Created ${batches.length} batches`); @@ -404,11 +581,33 @@ export async function uploadWithBatching( if (!opts.quiet) { const totalSize = sortedFiles.reduce((sum, f) => { - const content = f.content || fs.readFileSync(f.localPath, 'utf8'); - f.content = content; + const content = getTextContent(f); return sum + Buffer.byteLength(content, 'utf8'); }, 0); - log(`\n${FG_CYAN}Uploading ${files.length} files in ${batches.length} batch(es)${RESET} ${DIM}(${Math.round(totalSize / 1024)}KB total)${RESET}`); + const individualNote = individualFiles.length > 0 + ? ` ${DIM}+ ${individualFiles.length} file(s) individually${RESET}` + : ''; + log(`\n${FG_CYAN}Uploading ${textFiles.length} files in ${batches.length} batch(es)${RESET} ${DIM}(${Math.round(totalSize / 1024)}KB total)${RESET}${individualNote}`); + } + + // Upload individual files first — binary files are typically referenced + // by cards (e.g. Product → image links), and plain text files (.md etc.) + // are not sources the realm indexes. + for (const file of individualFiles) { + const singleResult = await uploadSingleFile(file, realmUrl, jwt, opts); + if (singleResult.success) { + totalUploaded++; + if (!opts.quiet) { + const tag = isBinaryFile(file) ? 'binary' : 'file'; + log(` ${FG_GREEN}✓${RESET} ${file.relativePath} ${DIM}(${tag}, ${singleResult.timeMs}ms)${RESET}`); + } + } else { + totalFailed++; + allErrors.push(...singleResult.errors); + if (!opts.quiet) { + log(` ${FG_RED}✗${RESET} ${file.relativePath}: ${singleResult.errors[0]?.error}`); + } + } } for (let i = 0; i < batches.length; i++) { diff --git a/src/lib/content-type.ts b/src/lib/content-type.ts new file mode 100644 index 0000000..9cce11a --- /dev/null +++ b/src/lib/content-type.ts @@ -0,0 +1,104 @@ +import * as fs from 'fs'; +import * as path from 'path'; + +const EXTENSION_MAP: Record = { + '.gts': 'application/vnd.card+source', + '.json': 'application/json', + '.html': 'text/html', + '.css': 'text/css', + '.js': 'application/javascript', + '.ts': 'application/typescript', + '.tsx': 'application/typescript', + '.jsx': 'application/javascript', + '.mjs': 'application/javascript', + '.cjs': 'application/javascript', + '.scss': 'text/x-scss', + '.less': 'text/x-less', + '.sass': 'text/x-sass', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.svg': 'image/svg+xml', + '.webp': 'image/webp', + '.ico': 'image/x-icon', + '.md': 'text/markdown', + '.txt': 'text/plain', + '.csv': 'text/csv', + '.xml': 'application/xml', + '.yaml': 'application/yaml', + '.yml': 'application/yaml', + '.woff': 'font/woff', + '.woff2': 'font/woff2', + '.ttf': 'font/ttf', + '.otf': 'font/otf', + '.pdf': 'application/pdf', + '.zip': 'application/zip', + '.wasm': 'application/wasm', +}; + +export function getContentType(filePath: string): string { + const ext = path.extname(filePath).toLowerCase(); + return EXTENSION_MAP[ext] || 'application/octet-stream'; +} + +export function isTextFile(contentType: string): boolean { + // Includes the text/x-* family so .scss/.less/.sass route through the + // text path and match ATOMIC_SOURCE_EXTENSIONS. + return ( + contentType.startsWith('text/') || + contentType === 'application/json' || + contentType === 'application/javascript' || + contentType === 'application/typescript' || + contentType === 'application/xml' || + contentType === 'application/yaml' || + contentType === 'application/vnd.card+source' || + contentType === 'image/svg+xml' + ); +} + +function looksLikeUtf8Text(buffer: Buffer): boolean { + if (buffer.length === 0) { + return true; + } + + if (buffer.includes(0)) { + return false; + } + + return !buffer.toString('utf8').includes('\uFFFD'); +} + +export function readFileForUpload( + filePath: string, + localPath: string, +): { content: string | Buffer; contentType: string } { + const inferredContentType = getContentType(filePath); + + if (isTextFile(inferredContentType)) { + return { + content: fs.readFileSync(localPath, 'utf8'), + contentType: inferredContentType, + }; + } + + const buffer = fs.readFileSync(localPath); + + if ( + inferredContentType === 'application/octet-stream' && + looksLikeUtf8Text(buffer) + ) { + return { + content: buffer.toString('utf8'), + contentType: 'text/plain', + }; + } + + return { + content: buffer, + // Realm upload endpoints currently use application/octet-stream as the + // discriminator for binary request parsing, and infer the served content + // type from the stored file name on GET. + contentType: 'application/octet-stream', + }; +} diff --git a/src/lib/realm-sync-base.ts b/src/lib/realm-sync-base.ts index eb6e49b..8ef1862 100644 --- a/src/lib/realm-sync-base.ts +++ b/src/lib/realm-sync-base.ts @@ -1,5 +1,6 @@ import { MatrixClient, passwordFromSeed } from './matrix-client.js'; import { RealmAuthClient } from './realm-auth-client.js'; +import { readFileForUpload } from './content-type.js'; import * as fs from 'fs'; import * as path from 'path'; import ignoreModule from 'ignore'; @@ -305,14 +306,14 @@ export abstract class RealmSyncBase { return; } - const content = fs.readFileSync(localPath, 'utf8'); + const { content, contentType } = readFileForUpload(relativePath, localPath); const url = this.buildFileUrl(relativePath); const jwt = await this.realmAuthClient.getJWT(); const response = await fetch(url, { method: 'POST', headers: { - 'Content-Type': 'text/plain;charset=UTF-8', + 'Content-Type': contentType, Authorization: jwt, Accept: SupportedMimeType.CardSource, }, diff --git a/test/lib/batch-upload.test.ts b/test/lib/batch-upload.test.ts index 1cf6bc0..20bfba1 100644 --- a/test/lib/batch-upload.test.ts +++ b/test/lib/batch-upload.test.ts @@ -218,7 +218,10 @@ describe('buildAtomicRequest', () => { expect(request['atomic:operations'][0].op).toBe('update'); }); - it('falls back to file type for invalid JSON', () => { + it('falls back to source type for invalid JSON', () => { + // The /_atomic endpoint only accepts 'card' and 'source' resource types. + // When a .json file can't be parsed as a card, we fall back to 'source' so + // the request still succeeds (rather than 'file', which isn't a valid type). const files = [ createFile('bad.json', 'not valid json {{'), ]; @@ -226,7 +229,7 @@ describe('buildAtomicRequest', () => { const request = buildAtomicRequest(files, 'https://realm.test/'); const op = request['atomic:operations'][0]; - expect(op.data.type).toBe('file'); + expect(op.data.type).toBe('source'); expect(op.data.attributes?.content).toBe('not valid json {{'); }); @@ -258,6 +261,74 @@ describe('buildAtomicRequest', () => { }); }); +describe('uploadSingleFile', () => { + let originalFetch: typeof fetch; + + beforeEach(() => { + originalFetch = globalThis.fetch; + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it('uploads jpg files as binary with octet-stream content type', async () => { + const localPath = path.join(tmpDir, 'image.jpg'); + const jpgBytes = Buffer.from([ + 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, + 0x49, 0x46, 0x00, 0x01, 0xff, 0xd9, + ]); + fs.writeFileSync(localPath, jpgBytes); + + let capturedBody: unknown; + let capturedHeaders: Headers | undefined; + + globalThis.fetch = vi.fn(async (_input: string | URL | Request, init?: RequestInit) => { + capturedBody = init?.body; + capturedHeaders = new Headers(init?.headers); + return new Response(null, { status: 204 }); + }) as typeof fetch; + + const result = await uploadSingleFile( + { relativePath: 'Product/images/image.jpg', localPath, operation: 'add' }, + 'https://realm.test/', + 'test-jwt', + ); + + expect(result.success).toBe(true); + expect(capturedHeaders?.get('Content-Type')).toBe('application/octet-stream'); + expect(capturedHeaders?.get('Accept')).toBe('*/*'); + expect(Buffer.isBuffer(capturedBody)).toBe(true); + expect(Buffer.from(capturedBody as Buffer)).toEqual(jpgBytes); + }); + + it('uploads csv files as text with text/csv content type', async () => { + const localPath = path.join(tmpDir, 'report.csv'); + const csv = 'name,count\nnorthwind,77\n'; + fs.writeFileSync(localPath, csv); + + let capturedBody: unknown; + let capturedHeaders: Headers | undefined; + + globalThis.fetch = vi.fn(async (_input: string | URL | Request, init?: RequestInit) => { + capturedBody = init?.body; + capturedHeaders = new Headers(init?.headers); + return new Response(null, { status: 204 }); + }) as typeof fetch; + + const result = await uploadSingleFile( + { relativePath: 'reports/report.csv', localPath, operation: 'add' }, + 'https://realm.test/', + 'test-jwt', + ); + + expect(result.success).toBe(true); + expect(capturedHeaders?.get('Content-Type')).toBe('text/csv'); + expect(typeof capturedBody).toBe('string'); + expect(capturedBody).toBe(csv); + }); +}); + describe('uploadBatch', () => { let originalFetch: typeof fetch;