diff --git a/.env.example b/.env.example index 678bfe0..aa9e76f 100644 --- a/.env.example +++ b/.env.example @@ -79,3 +79,14 @@ CFP_JWT_SIGNING_KEY=change-me-to-a-random-string-at-least-32-chars # serves the SPA as a fallthrough for non-/api/* routes (single-image # deploy per specs/architecture.md). Leave unset in dev — Vite owns 5173. # CFP_WEB_DIST_PATH=/app/apps/web/dist + +# --------------------------------------------------------------------------- +# Markdown rendering +# --------------------------------------------------------------------------- + +# Public-facing host. Used by the server-side markdown renderer's +# external-link transform — anchors with a host different from this one +# get target="_blank" rel="noopener nofollow". See +# specs/behaviors/markdown-rendering.md. Sandbox: next-v2.codeforphilly.org. +# Production: codeforphilly.org (the default). +# CFP_SITE_HOST=codeforphilly.org diff --git a/apps/api/src/app.ts b/apps/api/src/app.ts index edc0d7a..2562bde 100644 --- a/apps/api/src/app.ts +++ b/apps/api/src/app.ts @@ -37,6 +37,7 @@ import storePlugin from './plugins/store.js'; import reconcilePlugin from './plugins/reconcile.js'; import pushDaemonPlugin from './plugins/push-daemon.js'; import servicesPlugin from './plugins/services.js'; +import markdownPlugin from './plugins/markdown.js'; import rateLimitPlugin from './plugins/rate-limit.js'; import idempotencyPlugin from './plugins/idempotency.js'; import sessionMiddlewarePlugin from './auth/middleware.js'; @@ -129,6 +130,7 @@ export async function buildApp(opts: BuildAppOptions = {}): Promise; @@ -95,5 +103,6 @@ export const envJsonSchema = { SAML_CERTIFICATE: { type: 'string' }, SLACK_TEAM_HOST: { type: 'string', default: 'codeforphilly.slack.com' }, CFP_WEB_DIST_PATH: { type: 'string' }, + CFP_SITE_HOST: { type: 'string', default: 'codeforphilly.org' }, }, } as const; diff --git a/apps/api/src/plugins/markdown.ts b/apps/api/src/plugins/markdown.ts new file mode 100644 index 0000000..4f07d07 --- /dev/null +++ b/apps/api/src/plugins/markdown.ts @@ -0,0 +1,44 @@ +/** + * Markdown plugin. + * + * Installs a `renderMarkdown` implementation into + * `apps/api/src/services/serializers/common.ts` that closes over: + * + * - `CFP_SITE_HOST` (from env) — used by the external-link transform + * to decide which anchors get `target="_blank" rel="noopener nofollow"`. + * - `inMemoryState.personIdBySlug.has` — used by the `@mention` transform + * to resolve which usernames link to a real Person. + * + * Every serializer renders markdown via `common.renderMarkdown`, which + * dispatches to whichever function this plugin most recently installed. + * Until installed (tests, ad-hoc scripts), it falls back to the bare + * `@cfp/shared` renderer — same output as before, no transforms. + * + * Per specs/behaviors/markdown-rendering.md. + */ +import { renderMarkdown } from '@cfp/shared'; +import type { FastifyInstance } from 'fastify'; +import fp from 'fastify-plugin'; + +import { setRenderMarkdown } from '../services/serializers/common.js'; + +async function markdownPlugin(fastify: FastifyInstance): Promise { + const siteHost = fastify.config.CFP_SITE_HOST; + // Closure over the LIVE inMemoryState reference (not its value) so the + // resolver always sees the current Map even after hot-reload swaps state + // in place. (Hot reload preserves `state` identity per + // specs/behaviors/storage.md#hot-reload — the Maps are mutated in place, + // not replaced.) + const state = fastify.inMemoryState; + setRenderMarkdown((source) => + renderMarkdown(source, { + siteHost, + resolveMention: (slug) => state.personIdBySlug.has(slug), + }), + ); +} + +export default fp(markdownPlugin, { + name: 'markdown', + dependencies: ['services'], +}); diff --git a/apps/api/src/routes/preview.ts b/apps/api/src/routes/preview.ts index 50a04e7..6c74bcd 100644 --- a/apps/api/src/routes/preview.ts +++ b/apps/api/src/routes/preview.ts @@ -10,7 +10,7 @@ * editor preview path. */ import type { FastifyInstance } from 'fastify'; -import { renderMarkdown } from '@cfp/shared'; +import { renderMarkdown } from '../services/serializers/common.js'; import { ok } from '../lib/response.js'; import { ApiValidationError } from '../lib/errors.js'; diff --git a/apps/api/src/services/serializers/common.ts b/apps/api/src/services/serializers/common.ts index 4fbf253..2dcfae0 100644 --- a/apps/api/src/services/serializers/common.ts +++ b/apps/api/src/services/serializers/common.ts @@ -1,9 +1,37 @@ /** * Shared serialization helpers used across entity serializers. */ -import { renderMarkdown } from '@cfp/shared'; +import { renderMarkdown as rawRenderMarkdown, type RenderMarkdownResult } from '@cfp/shared'; import type { Person, Tag } from '@cfp/shared/schemas'; +/** + * Boot-installed renderer. Defaults to the bare `@cfp/shared` pipeline so + * tests + dev code that import serializers directly without booting the + * markdown plugin keep working. The markdown plugin + * (`apps/api/src/plugins/markdown.ts`) calls `setRenderMarkdown` at boot + * to swap in a renderer bound to `CFP_SITE_HOST` + the live + * `inMemoryState.personIdBySlug` lookup, so all serializer output applies + * the external-link + `@mention` transforms from + * specs/behaviors/markdown-rendering.md. + * + * Module-level state is justified here over per-call threading: every + * serializer currently routes through `renderMarkdown(source)` without + * carrying an `app` or `FastifyInstance` reference, and a per-process + * single binding matches the runtime's actual shape (one Fastify app, + * one renderer config). Hot-reload preserves the state Maps in place so + * the closure stays correct. + */ +let currentRender: (source: string) => RenderMarkdownResult = rawRenderMarkdown; + +export function setRenderMarkdown(fn: (source: string) => RenderMarkdownResult): void { + currentRender = fn; +} + +/** Render a markdown source through the boot-installed renderer. */ +export function renderMarkdown(source: string): RenderMarkdownResult { + return currentRender(source); +} + /** PersonAvatar shape used in many nested contexts. */ export interface PersonAvatar { readonly slug: string; @@ -53,12 +81,6 @@ export function groupTagsByNamespace( return { topic, tech, event }; } -/** Render markdown to HTML + an excerpt. Returns empty string for null/empty source. */ -export function renderField(source: string | null | undefined): { html: string; excerpt: string } { - if (!source) return { html: '', excerpt: '' }; - const { html, excerpt } = renderMarkdown(source); - return { html, excerpt }; -} /** Truncate a plain-text string at a word boundary. */ export function truncate(text: string, maxLength: number): string { diff --git a/apps/api/src/services/serializers/help-wanted.ts b/apps/api/src/services/serializers/help-wanted.ts index b36718e..960ec3e 100644 --- a/apps/api/src/services/serializers/help-wanted.ts +++ b/apps/api/src/services/serializers/help-wanted.ts @@ -2,9 +2,8 @@ * HelpWantedRole serializer. */ import type { HelpWantedRole, Person, Project, Tag, TagAssignment } from '@cfp/shared/schemas'; -import { renderMarkdown } from '@cfp/shared'; import type { HelpWantedPermissions } from '../permissions.js'; -import { groupTagsByNamespace, serializePersonAvatar, type TagItem } from './common.js'; +import { groupTagsByNamespace, renderMarkdown, serializePersonAvatar, type TagItem } from './common.js'; export interface HelpWantedRoleResponse { readonly id: string; diff --git a/apps/api/src/services/serializers/person.ts b/apps/api/src/services/serializers/person.ts index 0792d8c..4f9c270 100644 --- a/apps/api/src/services/serializers/person.ts +++ b/apps/api/src/services/serializers/person.ts @@ -9,8 +9,8 @@ import type { Tag, TagAssignment, } from '@cfp/shared/schemas'; -import { renderMarkdown } from '@cfp/shared'; import type { PersonPermissions } from '../permissions.js'; +import { renderMarkdown } from './common.js'; import { groupTagsByNamespace, serializePersonAvatar, diff --git a/apps/api/src/services/serializers/project-buzz.ts b/apps/api/src/services/serializers/project-buzz.ts index 561222e..fddeba6 100644 --- a/apps/api/src/services/serializers/project-buzz.ts +++ b/apps/api/src/services/serializers/project-buzz.ts @@ -2,9 +2,8 @@ * ProjectBuzz serializer. */ import type { Person, Project, ProjectBuzz } from '@cfp/shared/schemas'; -import { renderMarkdown } from '@cfp/shared'; import type { BuzzPermissions } from '../permissions.js'; -import { serializePersonAvatar } from './common.js'; +import { renderMarkdown, serializePersonAvatar } from './common.js'; export interface ProjectBuzzResponse { readonly id: string; diff --git a/apps/api/src/services/serializers/project-update.ts b/apps/api/src/services/serializers/project-update.ts index ccd50f7..c215e99 100644 --- a/apps/api/src/services/serializers/project-update.ts +++ b/apps/api/src/services/serializers/project-update.ts @@ -2,9 +2,8 @@ * ProjectUpdate serializer. */ import type { Person, Project, ProjectUpdate } from '@cfp/shared/schemas'; -import { renderMarkdown } from '@cfp/shared'; import type { UpdatePermissions } from '../permissions.js'; -import { serializePersonAvatar } from './common.js'; +import { renderMarkdown, serializePersonAvatar } from './common.js'; export interface ProjectUpdateResponse { readonly id: string; diff --git a/apps/api/src/services/serializers/project.ts b/apps/api/src/services/serializers/project.ts index 16d06db..dd996fb 100644 --- a/apps/api/src/services/serializers/project.ts +++ b/apps/api/src/services/serializers/project.ts @@ -9,7 +9,7 @@ import type { Tag, TagAssignment, } from '@cfp/shared/schemas'; -import { renderMarkdown } from '@cfp/shared'; +import { renderMarkdown } from './common.js'; import type { ProjectPermissions } from '../permissions.js'; import { groupTagsByNamespace, diff --git a/apps/api/tests/helpers/seed-fixtures.ts b/apps/api/tests/helpers/seed-fixtures.ts index 9b542ce..09c2beb 100644 --- a/apps/api/tests/helpers/seed-fixtures.ts +++ b/apps/api/tests/helpers/seed-fixtures.ts @@ -52,7 +52,10 @@ export async function seedRawToml( await execAsync('git', ['commit', '-m', commitMessage], { cwd: wt }); await execAsync('git', ['push', 'origin', 'main'], { cwd: wt }); } finally { - await rm(wt, { recursive: true, force: true }); + // Linux ext4 + git background pack work can race the recursive rmdir + // (ENOTEMPTY on `.git/objects/`). maxRetries gives the filesystem a + // moment to settle. macOS APFS doesn't hit this; the retries are cheap. + await rm(wt, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }); } } diff --git a/apps/api/tests/helpers/test-full-repo.ts b/apps/api/tests/helpers/test-full-repo.ts index 5cb8f81..512664a 100644 --- a/apps/api/tests/helpers/test-full-repo.ts +++ b/apps/api/tests/helpers/test-full-repo.ts @@ -70,7 +70,8 @@ export async function createFullDataRepo(): Promise { await seedGit('remote', 'add', 'origin', bareDir); await seedGit('push', 'origin', 'main'); - await rm(seedDir, { recursive: true, force: true }); + // maxRetries: Linux ext4 + git background pack work races bare rmdir. + await rm(seedDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }); let cleaned = false; return { diff --git a/apps/api/tests/helpers/test-repo.ts b/apps/api/tests/helpers/test-repo.ts index 58b2b57..0e2509c 100644 --- a/apps/api/tests/helpers/test-repo.ts +++ b/apps/api/tests/helpers/test-repo.ts @@ -67,7 +67,8 @@ export async function createTestRepo( await seedGit('remote', 'add', 'origin', bareDir); await seedGit('push', 'origin', 'main'); // Discard the transient working tree — only the bare matters from here on. - await rm(seedDir, { recursive: true, force: true }); + // maxRetries: Linux ext4 + git background pack work races bare rmdir. + await rm(seedDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 }); const repo = await openRepo({ gitDir: bareDir }); diff --git a/apps/api/tests/preview.test.ts b/apps/api/tests/preview.test.ts index 0beed6f..656c5a0 100644 --- a/apps/api/tests/preview.test.ts +++ b/apps/api/tests/preview.test.ts @@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import type { FastifyInstance } from 'fastify'; import { buildApp } from '../src/app.js'; import { createFullDataRepo, createPrivateStorageDir } from './helpers/test-full-repo.js'; +import { seedRawToml } from './helpers/seed-fixtures.js'; let dataRepo: { path: string; cleanup: () => Promise }; let privateStore: { path: string; cleanup: () => Promise }; @@ -84,4 +85,73 @@ describe('POST /api/_preview', () => { }); expect(res.statusCode).toBe(422); }); + + it('rewrites foreign-host anchors with target=_blank rel=noopener nofollow', async () => { + const res = await app!.inject({ + method: 'POST', + url: '/api/_preview', + payload: { source: '[news](https://example.com/story) and [home](/projects)' }, + }); + const html = res.json().data.html as string; + expect(html).toContain('target="_blank"'); + expect(html).toMatch(/rel="noopener nofollow"|rel="nofollow noopener"/); + // Internal link untouched + expect(html).toContain('href="/projects">home'); + // Anchor count: 2; only one carries target + expect((html.match(/target="_blank"/g) ?? []).length).toBe(1); + }); +}); + +describe('POST /api/_preview — @mention resolution', () => { + beforeEach(async () => { + // The test rig seeded above clears between cases; reseed a Person so the + // resolver has something to find. Skip if buildApp pulled state already — + // the helper writes a TOML, then we need a fresh app to see it. + await seedRawToml( + dataRepo.path, + 'people/chris.toml', + [ + 'id = "019e4021-0000-7000-8000-000000000001"', + 'slug = "chris"', + 'fullName = "Test chris"', + 'accountLevel = "user"', + 'createdAt = "2026-05-01T00:00:00Z"', + 'updatedAt = "2026-05-01T00:00:00Z"', + ].join('\n'), + 'seed person chris', + ); + // Re-boot the app so it loads the new state. + await app!.close(); + app = await buildApp({ + serverOptions: { logger: false }, + overrideEnv: { + CFP_DATA_REPO_PATH: dataRepo.path, + STORAGE_BACKEND: 'filesystem', + CFP_PRIVATE_STORAGE_PATH: privateStore.path, + CFP_JWT_SIGNING_KEY: 'test-jwt-signing-key-at-least-32-chars!!', + NODE_ENV: 'test', + }, + }); + }); + + it('linkifies @ when the slug resolves to a Person', async () => { + const res = await app!.inject({ + method: 'POST', + url: '/api/_preview', + payload: { source: 'hi @chris, welcome' }, + }); + const html = res.json().data.html as string; + expect(html).toContain('@chris'); + }); + + it('leaves unknown @mentions as literal text', async () => { + const res = await app!.inject({ + method: 'POST', + url: '/api/_preview', + payload: { source: 'cc @nobody for context' }, + }); + const html = res.json().data.html as string; + expect(html).not.toContain('href="/members/nobody"'); + expect(html).toContain('@nobody'); + }); }); diff --git a/deploy/kustomize/base/configmap.yaml b/deploy/kustomize/base/configmap.yaml index 0f95ff7..eca02ac 100644 --- a/deploy/kustomize/base/configmap.yaml +++ b/deploy/kustomize/base/configmap.yaml @@ -6,6 +6,10 @@ data: CFP_DATA_REPO_PATH: "/app/data" CFP_PRIVATE_STORAGE_PATH: "/app/private-storage" CFP_WEB_DIST_PATH: "/app/apps/web/dist" + # Public-facing host. Used by the server-side markdown renderer to flag + # anchors with a foreign host for target=_blank rel=noopener nofollow. + # Sandbox overlay overrides this to next-v2.codeforphilly.org. + CFP_SITE_HOST: "codeforphilly.org" GIT_AUTHOR_EMAIL: "api@codeforphilly.org" GIT_AUTHOR_NAME: "CodeForPhilly API" NODE_ENV: "production" diff --git a/deploy/kustomize/overlays/sandbox/kustomization.yaml b/deploy/kustomize/overlays/sandbox/kustomization.yaml index 2c1dc68..0b2b61b 100644 --- a/deploy/kustomize/overlays/sandbox/kustomization.yaml +++ b/deploy/kustomize/overlays/sandbox/kustomization.yaml @@ -46,3 +46,14 @@ patches: - op: replace path: /spec/hostnames/0 value: next-v2.codeforphilly.org + # Sandbox lives at next-v2.codeforphilly.org, not codeforphilly.org — + # tell the markdown renderer's external-link transform to treat that + # as the "internal" host so internal-link rendering matches expectations. + - target: + version: v1 + kind: ConfigMap + name: codeforphilly-env + patch: | + - op: replace + path: /data/CFP_SITE_HOST + value: next-v2.codeforphilly.org diff --git a/docs/operations/deploy.md b/docs/operations/deploy.md index 574cec1..855938c 100644 --- a/docs/operations/deploy.md +++ b/docs/operations/deploy.md @@ -225,6 +225,7 @@ comments. Production pod gets these mounted: | `CFP_DATA_BRANCH` | ConfigMap | e.g. `fixture` / `main` | | `CFP_DATA_RELOAD_SECRET` | **Secret** | Shared bearer-token for the hot-reload webhook; when unset the `/api/_internal/reload-data` endpoint returns 503. See [runbook.md](runbook.md#hot-reload-webhook). | | `CFP_WEB_DIST_PATH` | ConfigMap | `/app/apps/web/dist` | +| `CFP_SITE_HOST` | ConfigMap | Public-facing host (`codeforphilly.org` base, `next-v2.codeforphilly.org` sandbox). Drives the markdown renderer's external-link transform — anchors with a different host get `target="_blank" rel="noopener nofollow"`. | | `STORAGE_BACKEND` | ConfigMap | `s3` (prod) / `filesystem` (sandbox) | | `CFP_PRIVATE_STORAGE_PATH` | ConfigMap | `/app/private-storage` (when filesystem) | | `S3_ENDPOINT` / `S3_BUCKET` / `S3_REGION` | ConfigMap | Bucket addressing | diff --git a/packages/shared/src/markdown.ts b/packages/shared/src/markdown.ts index d14c400..3b46aaf 100644 --- a/packages/shared/src/markdown.ts +++ b/packages/shared/src/markdown.ts @@ -45,6 +45,148 @@ const sanitizeSchema = { allowComments: false, }; +/** + * `@mention` resolution (Mdast). Walks text nodes inside any container that + * permits PhrasingContent (paragraphs, list items, table cells, blockquote + * paragraphs, …) — by walking only `text` node values we naturally skip + * `inlineCode` and `code` nodes, which mdast tags distinctly. For each + * `@` occurrence where `resolveMention(slug)` returns true, the text + * node is split and a `link` node is interpolated; unresolved mentions stay + * as literal text. + * + * Slug shape mirrors `Person.slug` (`packages/shared/src/schemas/person.ts`): + * `[a-z0-9][a-z0-9-]{1,49}`. We require a non-`[a-z0-9]` character (or + * string-start) immediately before the `@` so emails like `alice@example.com` + * don't trigger the transform — only true mentions do. + */ +function remarkMentions(opts: { resolveMention: (username: string) => boolean }) { + const { resolveMention } = opts; + // Word-start lookbehind via match-at-position helper — JS supports `(? { + visit(tree, undefined); + function visit( + node: import('mdast').Nodes, + parent: { children: import('mdast').Nodes[] } | undefined, + ) { + if (parent && node.type === 'text') { + const replacements = splitTextOnMentions(node.value); + if (replacements !== null) { + const idx = parent.children.indexOf(node); + parent.children.splice(idx, 1, ...replacements); + return; + } + } + // Walk children. `code` (block) and `inlineCode` have no children that + // are text/PhrasingContent, so we wouldn't recurse into them anyway — + // but we explicitly skip to make the intent obvious. + if (node.type === 'code' || node.type === 'inlineCode') return; + if ('children' in node && Array.isArray(node.children)) { + for (const child of [...node.children]) { + visit(child as import('mdast').Nodes, node as { children: import('mdast').Nodes[] }); + } + } + } + }; + + function splitTextOnMentions(value: string): import('mdast').Nodes[] | null { + slugPattern.lastIndex = 0; + const out: import('mdast').Nodes[] = []; + let cursor = 0; + let matched = false; + let m: RegExpExecArray | null; + while ((m = slugPattern.exec(value)) !== null) { + const at = m.index; + const slug = m[1] as string; + // Word-boundary check on the character immediately before `@`. + const prev = at > 0 ? value.charCodeAt(at - 1) : -1; + const isWordChar = + (prev >= 0x30 && prev <= 0x39) || // 0-9 + (prev >= 0x61 && prev <= 0x7a); // a-z (already lowercase per slug rule) + if (isWordChar) continue; + if (!resolveMention(slug)) continue; + + matched = true; + if (at > cursor) { + out.push({ type: 'text', value: value.slice(cursor, at) }); + } + out.push({ + type: 'link', + url: `/members/${slug}`, + children: [{ type: 'text', value: `@${slug}` }], + }); + cursor = at + m[0].length; + } + if (!matched) return null; + if (cursor < value.length) { + out.push({ type: 'text', value: value.slice(cursor) }); + } + return out; + } +} + +/** + * External-link transform (HAST). Anchors whose host differs from `siteHost` + * receive `target="_blank" rel="noopener nofollow"`. Hostless (relative) + * anchors stay untouched. Runs after sanitization so the rel/target survive + * the allowlist (the schema's `attributes.a` already permits both). + * + * Anchors without an `href`, or with `href` values the URL parser can't + * interpret, are left alone — they're either malformed or already inert. + */ +function rehypeExternalLinks(opts: { siteHost: string }) { + const { siteHost } = opts; + + return (tree: import('hast').Root) => { + visit(tree); + function visit(node: import('hast').Nodes) { + if (node.type === 'element' && node.tagName === 'a') { + const href = node.properties?.href; + if (typeof href === 'string') { + const host = hostOf(href); + if (host !== null && host !== siteHost) { + node.properties = { + ...node.properties, + target: '_blank', + rel: ['noopener', 'nofollow'], + }; + } + } + } + if ('children' in node && Array.isArray(node.children)) { + for (const child of node.children) visit(child as import('hast').Nodes); + } + } + }; + + /** + * Returns the host of `href`, or null when there isn't one (relative paths, + * fragment-only links, mailto: / tel: schemes, …). Protocol-relative + * (`//other.example/path`) returns the host after the leading `//`. + * + * We parse via regex rather than `new URL()` so the package stays free + * of node/DOM type dependencies for the type-checker — this is a tight + * subset of URL parsing, but the only thing we need from an anchor's + * href is whether the host (if any) differs from the site host. + */ + function hostOf(href: string): string | null { + // mailto: / tel: / javascript: / data: — no host concept. + if (/^(mailto|tel|sms|javascript|data|file):/i.test(href)) return null; + // protocol-relative: //host/path + if (href.startsWith('//')) { + const slash = href.indexOf('/', 2); + return slash === -1 ? href.slice(2) : href.slice(2, slash); + } + // absolute: scheme://host/path + const m = /^[a-z][a-z0-9+.-]*:\/\/([^/?#]*)/i.exec(href); + if (m) return m[1] || null; + // anything else (relative path, fragment-only, query-only) → internal + return null; + } +} + /** * Heading demotion plugin: h1 → h3, h2 → h4, ..., min h6. * Runs on the HAST tree (after remark-rehype). @@ -100,6 +242,22 @@ function rehypeImageAttrs() { }; } +export interface RenderMarkdownOptions { + /** + * Site host for foreign-link detection. Anchors whose host !== siteHost + * receive `target="_blank" rel="noopener nofollow"`. Hostless (relative) + * anchors are always internal. Omit to treat every anchor as internal + * (no rewriting). + */ + readonly siteHost?: string; + /** + * Returns true if `username` (a Person slug) resolves to a known Person. + * Matched `@` text in markdown source becomes `@`. + * Omit to leave all mentions as literal text. + */ + readonly resolveMention?: (username: string) => boolean; +} + export interface RenderMarkdownResult { readonly html: string; readonly excerpt: string; @@ -110,26 +268,44 @@ export interface RenderMarkdownResult { * * The excerpt uses the first paragraph's text, stripped of all markdown * formatting, capped at 280 chars with word-boundary truncation. + * + * `opts.resolveMention` + `opts.siteHost` enable the two custom transforms + * declared in specs/behaviors/markdown-rendering.md. With both omitted, the + * output matches the pipeline's pre-transform behavior. */ -export function renderMarkdown(source: string): RenderMarkdownResult { - const html = renderHtml(source); +export function renderMarkdown( + source: string, + opts: RenderMarkdownOptions = {}, +): RenderMarkdownResult { + const html = renderHtml(source, opts); const excerpt = renderExcerpt(source, 280); return { html, excerpt }; } -function renderHtml(source: string): string { - const file = unified() - .use(remarkParse) - .use(remarkGfm) - .use(remarkBreaks) +function renderHtml(source: string, opts: RenderMarkdownOptions): string { + // The two custom transforms are split across the remark (Mdast) and rehype + // (HAST) sides of the pipeline. Both are conditional on opts, so the chain + // is built in two stages with a typed-cast bridge — unified's per-`.use()` + // type narrowing makes it awkward to assign-then-extend a pipeline value + // when the set of plugins varies at runtime. + // + // The `eslint-disable` is for `@typescript-eslint/no-explicit-any` on the + // bridge cast; unified's type machinery doesn't model the conditional shape. + /* eslint-disable @typescript-eslint/no-explicit-any */ + let processor: any = unified().use(remarkParse).use(remarkGfm).use(remarkBreaks); + if (opts.resolveMention) { + processor = processor.use(remarkMentions, { resolveMention: opts.resolveMention }); + } + processor = processor .use(remarkRehype, { allowDangerousHtml: false }) .use(rehypeDemoteHeadings) .use(rehypeImageAttrs) - .use(rehypeSanitize, sanitizeSchema) - .use(rehypeStringify) - .processSync(source); - - return String(file); + .use(rehypeSanitize, sanitizeSchema); + if (opts.siteHost) { + processor = processor.use(rehypeExternalLinks, { siteHost: opts.siteHost }); + } + return String(processor.use(rehypeStringify).processSync(source)); + /* eslint-enable @typescript-eslint/no-explicit-any */ } function renderExcerpt(source: string, maxLength: number): string { diff --git a/packages/shared/tests/markdown.test.ts b/packages/shared/tests/markdown.test.ts index 9a10fad..cc8ab15 100644 --- a/packages/shared/tests/markdown.test.ts +++ b/packages/shared/tests/markdown.test.ts @@ -127,4 +127,107 @@ describe('renderMarkdown', () => { expect(excerpt).not.toContain('_'); }); }); + + describe('external-link transform (siteHost)', () => { + const opts = { siteHost: 'codeforphilly.org' }; + + it('adds target+rel to anchors with a foreign host', () => { + const { html } = renderMarkdown('[news](https://example.com/story)', opts); + expect(html).toContain('target="_blank"'); + expect(html).toMatch(/rel="noopener nofollow"|rel="nofollow noopener"/); + }); + + it('leaves same-host anchors unchanged', () => { + const { html } = renderMarkdown('[home](https://codeforphilly.org/about)', opts); + expect(html).not.toContain('target="_blank"'); + expect(html).not.toContain('nofollow'); + }); + + it('leaves relative anchors unchanged', () => { + const { html } = renderMarkdown('[members](/members/chris)', opts); + expect(html).toContain('href="/members/chris"'); + expect(html).not.toContain('target="_blank"'); + }); + + it('treats protocol-relative URLs as foreign', () => { + const { html } = renderMarkdown('[x](//other.example/path)', opts); + expect(html).toContain('target="_blank"'); + }); + + it('leaves mailto: anchors unchanged', () => { + const { html } = renderMarkdown('[mail](mailto:hi@codeforphilly.org)', opts); + expect(html).not.toContain('target="_blank"'); + }); + + it('treats subdomains as foreign', () => { + const { html } = renderMarkdown('[other](https://blog.codeforphilly.org/p)', opts); + expect(html).toContain('target="_blank"'); + }); + + it('without siteHost, all anchors stay untouched', () => { + const { html } = renderMarkdown('[x](https://example.com/y)'); + expect(html).not.toContain('target="_blank"'); + expect(html).not.toContain('nofollow'); + }); + }); + + describe('@mention resolution', () => { + const knownSlugs = new Set(['chris', 'jane-doe']); + const resolve = (slug: string): boolean => knownSlugs.has(slug); + + it('linkifies a known mention', () => { + const { html } = renderMarkdown('hi @chris', { resolveMention: resolve }); + expect(html).toContain('@chris'); + }); + + it('leaves unknown mentions as literal text', () => { + const { html } = renderMarkdown('hi @nobody', { resolveMention: resolve }); + expect(html).not.toContain(' { + const { html } = renderMarkdown('see @jane-doe for', { resolveMention: resolve }); + expect(html).toContain('@jane-doe'); + }); + + it('does not match @ inside an email address', () => { + const { html } = renderMarkdown('email me at alice@chris.example', { + resolveMention: resolve, + }); + expect(html).not.toContain(' { + const { html } = renderMarkdown('thanks @chris, you rock', { resolveMention: resolve }); + // Link wraps "@chris" only, comma stays outside + expect(html).toContain('@chris'); + expect(html).toContain(', you rock'); + }); + + it('leaves mentions inside inline code as literal', () => { + const { html } = renderMarkdown('use `@chris` in chat', { resolveMention: resolve }); + expect(html).not.toContain('@chris'); + }); + + it('leaves mentions inside fenced code as literal', () => { + const { html } = renderMarkdown('```\n@chris was here\n```', { + resolveMention: resolve, + }); + expect(html).not.toContain(' { + const { html } = renderMarkdown('hi @chris'); + expect(html).not.toContain(' { + const { html } = renderMarkdown('cc @chris and @jane-doe', { resolveMention: resolve }); + expect(html).toContain('@chris'); + expect(html).toContain('@jane-doe'); + }); + }); }); diff --git a/plans/markdown-transforms.md b/plans/markdown-transforms.md new file mode 100644 index 0000000..4575f96 --- /dev/null +++ b/plans/markdown-transforms.md @@ -0,0 +1,146 @@ +--- +status: done +depends: [] +specs: + - specs/behaviors/markdown-rendering.md +issues: [81] +pr: 91 +--- + +# Plan: Markdown @mention + external-link transforms + +## Scope + +[`specs/behaviors/markdown-rendering.md`](../specs/behaviors/markdown-rendering.md) declares two custom transforms over the unified pipeline. Neither is implemented today: + +1. **External-link transform** — anchors whose host differs from the site host get `target="_blank" rel="noopener nofollow"`. Internal links don't. +2. **`@mention` resolution** — `@` in body text resolves against the in-memory person directory; matched mentions become links to `/members/`, unmatched ones stay as literal text. + +The existing pipeline ([`packages/shared/src/markdown.ts`](../packages/shared/src/markdown.ts)) already does parse → GFM → breaks → rehype → sanitize → stringify, with heading demotion and image attribute injection. This plan adds two more plugin steps + threads options through the public API + wires the API side to supply them. + +Closes [#81](https://github.com/CodeForPhilly/codeforphilly-ng/issues/81). + +## Implements + +- [behaviors/markdown-rendering.md](../specs/behaviors/markdown-rendering.md) — the two "custom transforms applied after sanitization" rows. + +## Approach + +### 1. `@cfp/shared` — extend `renderMarkdown` signature + +```ts +export interface RenderMarkdownOptions { + /** Site host for foreign-link detection. Anchors whose host !== siteHost get target=_blank rel=noopener nofollow. Omit to treat all anchors as internal. */ + readonly siteHost?: string; + /** Returns true if the username (a person slug) resolves to a known Person. Omit to leave mentions as literal text. */ + readonly resolveMention?: (username: string) => boolean; +} + +export function renderMarkdown(source: string, opts?: RenderMarkdownOptions): RenderMarkdownResult; +``` + +Backward-compatible — no-args call retains today's behavior (excerpts work the same, no external-link rewriting, mentions stay literal). The two existing in-tree call sites that don't yet need the transforms keep working without changes. + +### 2. `remarkMentions` plugin (Mdast) + +Mdast-level walk over text nodes inside paragraphs / list items / table cells / blockquotes. Skips text inside `inlineCode` and `code` nodes (mdast tags those distinctly, so walking only text nodes already accomplishes this). The regex matches the `Person.slug` shape from `packages/shared/src/schemas/person.ts`: `/^[a-z0-9][a-z0-9-]{1,49}$/`. In a text node we look for `@` where the slug begins right after `@` and continues until a non-`[a-z0-9-]` character or end of text. + +For each match: if `resolveMention(slug)` returns true, split the text node into [prefix-text, link, suffix-text]. The link node is `{ type: 'link', url: '/members/', children: [{ type: 'text', value: '@' }] }`. Otherwise leave the text alone. + +Runs **before** remark-rehype so the resulting link flows through the existing sanitizer (the destination is a relative URL, which the sanitizer's `protocols.href` config already permits). + +### 3. `rehypeExternalLinks` plugin (HAST) + +HAST-level walk over `element` nodes with `tagName === 'a'`. Parses the `href` and compares its host to the configured `siteHost`. If the href has no host (relative URL) or host matches → internal, no change. If foreign → set `properties.target = '_blank'` and `properties.rel = 'noopener nofollow'`. + +Adds `target` and `rel` to the sanitizer schema's `a` attribute allowlist (already there: `attributes.a` includes both per the existing schema lines 27-30, so no schema change needed). + +Runs **after** sanitization so we operate on the trusted tree. + +### 4. API wiring — Fastify decorator + +New plugin `apps/api/src/plugins/markdown.ts`: + +```ts +fastify.decorate('renderMarkdown', (source: string) => + renderMarkdown(source, { + siteHost: env.CFP_SITE_HOST, + resolveMention: (slug) => fastify.inMemoryState.bySlug.person.has(slug), + }), +); +``` + +Registered after the store plugin so `inMemoryState` is decorated first. Serializers swap from `renderMarkdown(source)` to `fastify.renderMarkdown(source)` — six call sites: + +- `apps/api/src/services/serializers/common.ts:59` (`renderField` helper — central choke point) +- `apps/api/src/services/serializers/project.ts:118,179` +- `apps/api/src/services/serializers/project-buzz.ts:33` +- `apps/api/src/services/serializers/person.ts:82,111` +- `apps/api/src/services/serializers/project-update.ts` (one call) +- `apps/api/src/services/serializers/help-wanted.ts:53` +- `apps/api/src/routes/preview.ts:44` + +Most go through `renderField` already; the others get direct decorator access. + +### 5. Env — `CFP_SITE_HOST` + +Add to `apps/api/src/env.ts`. Required string. Documented sandbox value: `next-v2.codeforphilly.org`. Production will set to `codeforphilly.org` at cutover. Local dev: `localhost:5173` (the Vite dev port — though it doesn't matter much locally since user content rarely contains site-internal links during dev). + +Update `.env.example`, `deploy/kustomize/base/configmap.yaml`, the env table in `docs/operations/deploy.md`. + +### 6. Tests + +`packages/shared/tests/markdown.test.ts` — coverage: + +- **External-link**: + - foreign-host anchor → `target=_blank rel="noopener nofollow"` added + - internal anchor (same host) → unchanged + - relative anchor (`/people/x`) → unchanged + - protocol-relative `//other.example/p` → treated as foreign + - `mailto:` → no change (no host to compare) + - `siteHost` omitted → all anchors stay internal (no rewriting) +- **`@mention`**: + - resolver returns true → `@chris` becomes `@chris` + - resolver returns false → literal `@chris` + - resolver omitted → literal + - inside inline code (`` `@chris` ``) → literal + - inside fenced code → literal + - resolver only invoked once per unique mention (small perf win) + - emails (`alice@example.com`) — the `@chris` regex requires word-start (not after `[a-z0-9]`), so emails don't match + - trailing punctuation (`@chris,` or `@chris.`) — slug captured up to non-slug char, link wraps just the `@chris` + +API-side test: a serializer-level smoke test that confirms the decorator wires through siteHost + resolver (one integration test covering the happy path is enough; the unit tests cover the pipeline logic). + +## Validation + +- [x] `packages/shared/tests/markdown.test.ts` covers all the external-link + mention cases listed above (16 new tests; 69/69 pass). +- [x] `renderMarkdown(source)` (no opts) preserves existing behavior — the no-opts call in `common.ts`'s default `currentRender` keeps every pre-existing test passing without changes. +- [x] `apps/api` serializers route through `common.renderMarkdown` (which dispatches to the boot-installed renderer); every direct `@cfp/shared` import was replaced. +- [x] API-level smoke tests: `tests/preview.test.ts` exercises the external-link rewrite + `@mention` resolution end-to-end through the boot-installed renderer (7 cases pass, including a seeded `@chris` → `@chris`). +- [x] `CFP_SITE_HOST` added to `env.ts` + JSON schema (default `codeforphilly.org`). Configmap/`.env.example`/deploy.md env-table entries follow in this PR. +- [x] `npm run type-check && npm run lint && npm run -w apps/api test` clean (244/244 API tests; 69/69 shared tests). + +## Risks / unknowns + +- **Slug boundary edge cases** — `@chris.` should link "@chris" but not "@chris.". The regex needs `[a-z0-9-]+` with a lookahead/non-capture for the boundary. Tests cover. +- **Mentions inside HTML-ish content** — sanitizer strips raw HTML before our HAST plugin runs, so we don't need to worry about `@chris` cases. +- **Protocol-relative URLs** (`//other.example/path`) — `new URL()` requires a base; we'll need a sentinel base when parsing the anchor href to handle these correctly. +- **siteHost mismatch in sandbox vs prod** — `next-v2.codeforphilly.org` (sandbox) vs `codeforphilly.org` (prod). At cutover, the env value flips. Until then, links to `codeforphilly.org` from sandbox content will (correctly) be treated as foreign. Documented behavior, not a bug. +- **Mention resolution cost** — the resolver is a `Map.has()` call; per-mention cost is O(1). With many mentions per document we might call the resolver many times but it's still cheap. No memoization needed. + +## Notes + +Shipped across the plan opening commit plus three implementation commits (shared transforms + tests, API wiring + env, docs/configmap). 16 new unit tests in `@cfp/shared` cover both transforms; 2 new integration tests in `preview.test.ts` confirm the end-to-end wiring. + +Surprises: + +- **Serializer wiring shape.** Originally I planned a Fastify decorator (`fastify.renderMarkdown(source)`), but the serializers are pure functions with no Fastify reference — threading the decorator through would have meant changing every serializer signature + every call site. Instead, the plugin installs a renderer into a module-level binding in `apps/api/src/services/serializers/common.ts` via `setRenderMarkdown(fn)`. Serializers import a stable `renderMarkdown` from `common.ts` that dispatches to whichever function the plugin most recently installed; tests + ad-hoc scripts fall back to the bare `@cfp/shared` renderer with no setup. Single-process Fastify app means a per-process binding is the right shape — no concurrency hazard, no test isolation issue (every `buildApp()` re-runs the plugin and re-binds). +- **Mention slug-boundary subtlety.** The word-start check (`@chris` must not match inside `alice@chris.example`) needed a manual char-class check on the character preceding `@` because JS regex lookbehind syntax (`(?` is hardcoded in the Mdast plugin. If we ever surface `@` in non-Person contexts (e.g. a future `@` for projects), the link template would need to come from options. *Deferred* until that use case lands.