Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/api/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ import { projectBuzzRoutes } from './routes/projects-buzz.js';
import { helpWantedRoutes } from './routes/projects-help-wanted.js';
import { projectMembershipRoutes } from './routes/projects-members.js';
import { previewRoutes } from './routes/preview.js';
import { attachmentRoutes } from './routes/attachments.js';
import { samlRoutes } from './routes/saml.js';
import { internalRoutes } from './routes/internal.js';

Expand Down Expand Up @@ -180,6 +181,7 @@ export async function buildApp(opts: BuildAppOptions = {}): Promise<FastifyInsta
await fastify.register(helpWantedRoutes);
await fastify.register(projectMembershipRoutes);
await fastify.register(previewRoutes);
await fastify.register(attachmentRoutes);
await fastify.register(samlRoutes);
await fastify.register(internalRoutes);

Expand Down
179 changes: 179 additions & 0 deletions apps/api/src/routes/attachments.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/**
* Attachment serving.
*
* GET /api/attachments/* → stream the gitsheets attachment at the
* path captured by the wildcard segment
*
* Reads via `git cat-file blob HEAD:<key>` against the bare data clone,
* piping stdout directly into the Fastify reply for streaming. Per
* specs/behaviors/storage.md → "Attachments": "Web serves attachments
* via a streamed GET /api/attachments/<key> route with cache headers."
*
* Bypasses gitsheets' `Sheet.getAttachment()` API in favor of direct git
* plumbing because:
* 1. The attachment key IS the HEAD-tree path (per spec) — parsing it
* back into (sheet, record, name) and re-resolving via the sheet API
* is redundant.
* 2. Standing Sheet handles cache `dataTree` at openStore time
* (documented in storage.md → "Direct gitsheets reads after a
* transact"); plumbing reads from current HEAD on every request,
* so attachment updates are visible immediately without a
* Store.swapPublic().
*/
import type { FastifyInstance, FastifyRequest } from 'fastify';
import { spawn } from 'node:child_process';

import { ApiNotFoundError, ApiValidationError } from '../lib/errors.js';

/**
* Minimum-viable extension → Content-Type table. The set we care about
* today is avatars + buzz images + the occasional PDF; unknown extensions
* fall back to application/octet-stream (clients that need to render know
* what they asked for).
*/
const MIME_BY_EXT: Record<string, string> = {
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
png: 'image/png',
gif: 'image/gif',
webp: 'image/webp',
avif: 'image/avif',
svg: 'image/svg+xml',
pdf: 'application/pdf',
};

function inferContentType(key: string): string {
const dot = key.lastIndexOf('.');
if (dot < 0 || dot === key.length - 1) return 'application/octet-stream';
const ext = key.slice(dot + 1).toLowerCase();
return MIME_BY_EXT[ext] ?? 'application/octet-stream';
}

/**
* Validate a wildcard-captured attachment key. Returns the key on success;
* throws ApiValidationError on shape violations. Defense in depth — git
* cat-file with a ref:path argument is itself resistant to shell exploits
* (no shell interpolation; the path is a single argv), but rejecting
* obviously-malformed keys up front gives clearer error messages and
* sidesteps any sheet/path-template-related edge cases.
*/
function validateKey(raw: string): string {
if (raw.length === 0) {
throw new ApiValidationError('attachment key is required', { key: 'required' });
}
if (raw.startsWith('/')) {
throw new ApiValidationError('attachment key must not start with /', { key: 'no_leading_slash' });
}
// Reject any control char or null byte. The eslint-disable is for the
// explicit \x00-\x1f range — intentional precisely because we DO want
// to catch control chars in keys (security-relevant input validation).
// eslint-disable-next-line no-control-regex
if (/[\x00-\x1f\x7f]/.test(raw)) {
throw new ApiValidationError('attachment key contains control characters', {
key: 'invalid_chars',
});
}
// Split on `/` and reject `..`, `.`, or empty segments (`//`, trailing `/`).
for (const segment of raw.split('/')) {
if (segment === '' || segment === '.' || segment === '..') {
throw new ApiValidationError('attachment key contains an invalid segment', {
key: 'invalid_path',
});
}
}
return raw;
}

export async function attachmentRoutes(fastify: FastifyInstance): Promise<void> {
const repoPath = fastify.config.CFP_DATA_REPO_PATH;

fastify.get(
'/api/attachments/*',
{
schema: {
tags: ['attachments'],
summary: 'Serve a gitsheets attachment by its on-record key',
// Wildcard params get folded into params['*']; document the response
// shape but skip strict param validation (the route does its own).
response: {
200: { type: 'string', description: 'Binary blob; streamed as the response body.' },
},
},
},
async (request: FastifyRequest, reply) => {
const raw = (request.params as Record<string, string>)['*'] ?? '';
const key = validateKey(raw);

// `git cat-file blob HEAD:<path>` writes the blob to stdout and exits
// 0 on success, non-zero (with a "fatal:" message on stderr) if the
// path doesn't resolve in HEAD. We branch on exit code below.
const child = spawn('git', ['cat-file', 'blob', `HEAD:${key}`], {
cwd: repoPath,
stdio: ['ignore', 'pipe', 'pipe'],
});

const stderrChunks: Buffer[] = [];
child.stderr.on('data', (chunk: Buffer) => stderrChunks.push(chunk));

// Wait for either:
// - first stdout data → success, set headers and pipe
// - exit before any stdout → failure, translate to 4xx/5xx
// Race-style with a single resolve.
const exited = new Promise<number>((resolve) => {
child.on('close', (code) => resolve(code ?? -1));
});

const firstData = new Promise<Buffer | null>((resolve) => {
let resolved = false;
const onData = (chunk: Buffer): void => {
if (!resolved) {
resolved = true;
child.stdout.off('data', onData);
resolve(chunk);
}
};
child.stdout.on('data', onData);
// If the child exits without ever emitting stdout, resolve null.
child.on('close', () => {
if (!resolved) {
resolved = true;
resolve(null);
}
});
});

const first = await firstData;
if (first === null) {
const code = await exited;
const stderr = Buffer.concat(stderrChunks).toString('utf8').trim();
// git cat-file's stderr for a missing path looks like:
// fatal: path '<key>' does not exist in 'HEAD'
// fatal: Not a valid object name HEAD:<key>
// Both are 404-shaped; any other non-zero exit is unexpected.
if (code !== 0 && /not (?:a valid|exist)|fatal:/i.test(stderr)) {
throw new ApiNotFoundError(`attachment not found: ${key}`);
}
throw new Error(`git cat-file failed (exit ${code}): ${stderr || 'no stderr'}`);
}

// First chunk arrived → take over the raw response so we can write
// the buffered first chunk + pipe the rest. `reply.hijack()` tells
// Fastify "I'll send the response myself" — headers must be set
// directly on reply.raw from this point on.
reply.hijack();
reply.raw.writeHead(200, {
'Content-Type': inferContentType(key),
'Cache-Control': 'public, max-age=3600',
});
reply.raw.write(first);
child.stdout.pipe(reply.raw);

// Resolve when the child finishes flushing — without this, Fastify's
// handler-promise resolves immediately and may close the socket.
await new Promise<void>((resolve, reject) => {
child.stdout.on('end', () => resolve());
child.on('error', reject);
});
},
);
}
156 changes: 156 additions & 0 deletions apps/api/tests/attachments.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/**
* Tests for GET /api/attachments/:key — implements
* specs/behaviors/storage.md → "Attachments".
*
* Seeds binary blobs at the path the attachment key points to, then
* exercises the route via Fastify inject. Path-traversal + missing-key
* cases verify the validator and the git-cat-file failure translation.
*/
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { FastifyInstance } from 'fastify';
import { buildApp } from '../src/app.js';
import { createFullDataRepo, createPrivateStorageDir } from './helpers/test-full-repo.js';
import { seedRawBlob } from './helpers/seed-fixtures.js';

let dataRepo: { path: string; cleanup: () => Promise<void> };
let privateStore: { path: string; cleanup: () => Promise<void> };
let app: FastifyInstance | undefined;

async function bootApp(): Promise<FastifyInstance> {
return buildApp({
serverOptions: { logger: false },
overrideEnv: {
CFP_DATA_REPO_PATH: dataRepo.path,
STORAGE_BACKEND: 'filesystem',
CFP_PRIVATE_STORAGE_PATH: privateStore.path,
CFP_JWT_SIGNING_KEY: 'test-jwt-signing-key-at-least-32-chars!!',
NODE_ENV: 'test',
},
});
}

// Minimal valid PNG: 8-byte signature + IHDR + IEND. Not a real image
// (no IDAT), but byte-comparable through git cat-file and the route.
const TINY_PNG = Buffer.from([
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature
0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR chunk header
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
0x08, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x7e, 0x9b,
0x55, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82,
]);

beforeEach(async () => {
dataRepo = await createFullDataRepo();
privateStore = await createPrivateStorageDir();
});

afterEach(async () => {
if (app) {
await app.close();
app = undefined;
}
await dataRepo.cleanup();
await privateStore.cleanup();
});

describe('GET /api/attachments/*', () => {
it('serves a seeded avatar by key with image/png Content-Type', async () => {
await seedRawBlob(
dataRepo.path,
'people/chris/avatar.png',
TINY_PNG,
'seed avatar for chris',
);
app = await bootApp();

const res = await app.inject({
method: 'GET',
url: '/api/attachments/people/chris/avatar.png',
});
expect(res.statusCode).toBe(200);
expect(res.headers['content-type']).toBe('image/png');
expect(res.headers['cache-control']).toContain('max-age=3600');
expect(Buffer.from(res.rawPayload).equals(TINY_PNG)).toBe(true);
});

it('infers Content-Type from each known extension', async () => {
const cases: Array<{ path: string; type: string }> = [
{ path: 'people/a/avatar.jpg', type: 'image/jpeg' },
{ path: 'people/b/avatar.jpeg', type: 'image/jpeg' },
{ path: 'people/c/avatar.webp', type: 'image/webp' },
{ path: 'people/d/avatar.gif', type: 'image/gif' },
{ path: 'people/e/avatar.svg', type: 'image/svg+xml' },
{ path: 'people/f/doc.pdf', type: 'application/pdf' },
{ path: 'people/g/unknown.xyz', type: 'application/octet-stream' },
];
for (const { path } of cases) {
await seedRawBlob(dataRepo.path, path, Buffer.from([0x00, 0x01]), `seed ${path}`);
}
app = await bootApp();

for (const { path, type } of cases) {
const res = await app.inject({ method: 'GET', url: `/api/attachments/${path}` });
expect(res.statusCode, `${path} status`).toBe(200);
expect(res.headers['content-type'], `${path} content-type`).toBe(type);
}
});

it('returns 404 for a key not in HEAD', async () => {
app = await bootApp();
const res = await app.inject({
method: 'GET',
url: '/api/attachments/people/nobody/avatar.png',
});
expect(res.statusCode).toBe(404);
const body = res.json();
expect(body.success).toBe(false);
expect(body.error?.code).toBe('not_found');
});

it('does not serve files outside the data repo via URL-based traversal', async () => {
// Fastify normalizes `..` segments in the URL path before our handler
// sees them, so `/api/attachments/../etc/passwd` becomes `/etc/passwd`
// (no route match → 404). Our validator catches `..` segments too as
// defense in depth, but the operative contract is: traversal never
// serves a 200 from a file outside the data repo.
app = await bootApp();
const cases = [
'/api/attachments/../etc/passwd',
'/api/attachments/people/../../foo',
];
for (const url of cases) {
const res = await app.inject({ method: 'GET', url });
expect(res.statusCode, url).not.toBe(200);
}
});

it('rejects keys with embedded null bytes with 422', async () => {
app = await bootApp();
// %00 decodes to a null byte; the validator rejects control chars
// explicitly so even if Fastify lets it through to the route, we 422.
const res = await app.inject({
method: 'GET',
url: '/api/attachments/people/chris/avatar%00.png',
});
expect(res.statusCode).toBe(422);
});

it('serves binary content byte-identical (no transcoding)', async () => {
// Include all bytes 0-255 to verify the streaming path is byte-clean.
const allBytes = Buffer.from(Array.from({ length: 256 }, (_, i) => i));
await seedRawBlob(
dataRepo.path,
'people/binary-test/data.bin',
allBytes,
'seed binary test',
);
app = await bootApp();

const res = await app.inject({
method: 'GET',
url: '/api/attachments/people/binary-test/data.bin',
});
expect(res.statusCode).toBe(200);
expect(Buffer.from(res.rawPayload).equals(allBytes)).toBe(true);
});
});
31 changes: 31 additions & 0 deletions apps/api/tests/helpers/seed-fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,37 @@ export async function seedRawToml(
}
}

/**
* Sibling of `seedRawToml` for binary blobs (attachments, images, …).
* Writes raw bytes at `relPath` and commits via the same transient
* working-tree-clone-then-push dance.
*/
export async function seedRawBlob(
bareRepoPath: string,
relPath: string,
bytes: Buffer,
commitMessage: string,
): Promise<void> {
const wt = await mkdtemp(join(tmpdir(), 'cfp-seed-wt-'));
try {
await execAsync('git', ['clone', bareRepoPath, wt]);
await execAsync('git', ['config', 'user.email', 'test@cfp.test'], { cwd: wt });
await execAsync('git', ['config', 'user.name', 'cfp test'], { cwd: wt });
await execAsync('git', ['config', 'commit.gpgsign', 'false'], { cwd: wt });
await execAsync('git', ['config', 'core.hooksPath', '/dev/null'], { cwd: wt });

const absPath = join(wt, relPath);
await mkdir(dirname(absPath), { recursive: true });
await writeFile(absPath, bytes);

await execAsync('git', ['add', relPath], { cwd: wt });
await execAsync('git', ['commit', '-m', commitMessage], { cwd: wt });
await execAsync('git', ['push', 'origin', 'main'], { cwd: wt });
} finally {
await rm(wt, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
}
}

const NOW = '2026-05-01T00:00:00Z';
const NOW2 = '2026-05-10T00:00:00Z';

Expand Down
Loading