From 13b690b11d47e2de645bc0bf0ff337521002c903 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 16 Jun 2026 11:31:38 +0000 Subject: [PATCH 1/7] test(conformance): serve the 2026-07-28 path from the conformance fixture Route modern-classified requests (per-request _meta envelope, server/discover) through a createMcpHandler entry backed by the same fixture server definition; legacy-classified traffic stays on the existing stateful 2025 session path unchanged. Teach the everything client to read MCP_CONFORMANCE_PROTOCOL_VERSION, negotiate the modern era with versionNegotiation on 2026-07-28 runs, and handle the request-metadata scenario. Expected-failures burn-down (entries now passing, removed): - request-metadata (client): the server/discover negotiation probe satisfies the SEP-2575 header/_meta/retry-on--32004 checks - caching (server): 2026-era list/read results now carry ttlMs/cacheScope - http-custom-header-server-validation (server): every check is SKIPPED because the fixture registers no x-mcp-header-annotated tool; the disputed SEP-2243 error-code cells stay covered by the http-header-validation entry Comments on the remaining draft-suite entries updated to name what actually blocks them now that the 2026-07-28 path is served (multi-round-trip requests, the disputed error-code cells, error-id echo, removed-method handling). --- test/conformance/expected-failures.yaml | 21 +++--- test/conformance/src/everythingClient.ts | 91 ++++++++++++++++++++++++ test/conformance/src/everythingServer.ts | 36 +++++++++- 3 files changed, 135 insertions(+), 13 deletions(-) diff --git a/test/conformance/expected-failures.yaml b/test/conformance/expected-failures.yaml index 486df89058..be2d4cd020 100644 --- a/test/conformance/expected-failures.yaml +++ b/test/conformance/expected-failures.yaml @@ -22,9 +22,6 @@ client: # --- Draft-spec scenarios (in `--suite draft`, also part of `--suite all`) --- - # SEP-2575 (request metadata / _meta envelope): client does not populate the - # _meta envelope or the MCP-Protocol-Version header semantics yet. - - request-metadata # SEP-2322 (multi-round-trip requests): client does not echo requestState / # handle IncompleteResult yet. - sep-2322-client-request-state @@ -59,12 +56,16 @@ client: server: # --- Draft-spec scenarios (in `--suite draft`; the default `active` suite is green) --- - # SEP-2575 (stateless HTTP / _meta envelope): server has no stateless mode, - # _meta-derived capabilities, error-code mappings, or server/discover yet. + # SEP-2575 (stateless HTTP / _meta envelope): the fixture serves the + # 2026-07-28 path, but the scenario still fails on (a) error responses not + # echoing the request JSON-RPC id, (b) the envelope/header error-code cells + # that are parameterized pending conformance #336 (see header note), and + # (c) removed-method handling for `initialize` (404/-32601 expected, + # 400/-32004 served). - server-stateless - # SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented; - # most scenarios currently fail early with "Session ID required" because the - # fixture only runs in stateful mode. + # SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented + # in the SDK, so the fixture does not register the scenarios' diagnostic + # test_input_required_result_* tools. - input-required-result-basic-elicitation - input-required-result-basic-sampling - input-required-result-basic-list-roots @@ -75,15 +76,11 @@ server: - input-required-result-result-type - input-required-result-tampered-state - input-required-result-capability-check - # SEP-2549 (caching): no ttlMs/cacheScope support; scenario also hits the - # stateful-mode "Session ID required" error. - - caching # SEP-2243 (HTTP header standardization): the reject cells the SDK does # answer now use -32001 (HeaderMismatch), but missing-header enforcement # (Mcp-Method, Mcp-Name) and the Mcp-Name cross-check are not implemented, # so those reject cells are still accepted with 200. - http-header-validation - - http-custom-header-server-validation # WARNING-only entries: these scenarios emit no FAILURE checks, only SHOULD-level # WARNINGs, but the expected-failures evaluator counts WARNINGs as failures. # SEP-2164: server returns -32002 without the requested URI in error.data. diff --git a/test/conformance/src/everythingClient.ts b/test/conformance/src/everythingClient.ts index 05103eb26d..e58f5558c3 100644 --- a/test/conformance/src/everythingClient.ts +++ b/test/conformance/src/everythingClient.ts @@ -14,9 +14,12 @@ import { Client, + CLIENT_CAPABILITIES_META_KEY, + CLIENT_INFO_META_KEY, ClientCredentialsProvider, CrossAppAccessProvider, PrivateKeyJwtProvider, + PROTOCOL_VERSION_META_KEY, requestJwtAuthorizationGrant, StreamableHTTPClientTransport } from '@modelcontextprotocol/client'; @@ -96,6 +99,38 @@ function registerScenarios(names: string[], handler: ScenarioHandler): void { } } +// ============================================================================ +// 2026-07-28 (modern era) helpers +// ============================================================================ + +/** + * Spec versions whose wire lifecycle is the 2026-07-28 per-request envelope + * (no `initialize` handshake). The conformance runner passes the resolved + * spec version of the current scenario run via the + * MCP_CONFORMANCE_PROTOCOL_VERSION environment variable; when it names a + * modern version, version-spanning scenarios (e.g. tools_call) must speak the + * modern lifecycle instead of the 2025 stateful one. + */ +const MODERN_SPEC_VERSIONS = new Set(['2026-07-28']); + +function isModernConformanceRun(): boolean { + const version = process.env.MCP_CONFORMANCE_PROTOCOL_VERSION; + return version !== undefined && MODERN_SPEC_VERSIONS.has(version); +} + +/** + * The per-request `_meta` envelope every 2026-era request carries on the wire. + * Automatic envelope emission is not implemented in the client yet (it is a + * client-side follow-up), so modern-era requests attach it explicitly. + */ +function modernEnvelope(clientInfo: { name: string; version: string }, capabilities: object, protocolVersion: string | undefined) { + return { + [PROTOCOL_VERSION_META_KEY]: protocolVersion ?? '2026-07-28', + [CLIENT_INFO_META_KEY]: clientInfo, + [CLIENT_CAPABILITIES_META_KEY]: capabilities + }; +} + // ============================================================================ // Basic scenarios (initialize, tools_call) // ============================================================================ @@ -117,6 +152,10 @@ async function runBasicClient(serverUrl: string): Promise { // tools_call scenario needs to actually call a tool async function runToolsCallClient(serverUrl: string): Promise { + if (isModernConformanceRun()) { + return runToolsCallModernClient(serverUrl); + } + const client = new Client({ name: 'test-client', version: '1.0.0' }, { capabilities: {} }); const transport = new StreamableHTTPClientTransport(new URL(serverUrl)); @@ -141,8 +180,60 @@ async function runToolsCallClient(serverUrl: string): Promise { logger.debug('Connection closed successfully'); } +// tools_call under a 2026-07-28 run: negotiate the modern era via +// server/discover (versionNegotiation), then drive the same tool flow with +// the per-request _meta envelope attached to every request. +async function runToolsCallModernClient(serverUrl: string): Promise { + const clientInfo = { name: 'test-client', version: '1.0.0' }; + const client = new Client(clientInfo, { capabilities: {}, versionNegotiation: { mode: 'auto' } }); + + const transport = new StreamableHTTPClientTransport(new URL(serverUrl)); + + await client.connect(transport); + logger.debug('Negotiated protocol version:', client.getNegotiatedProtocolVersion()); + + const envelope = modernEnvelope(clientInfo, {}, client.getNegotiatedProtocolVersion()); + const tools = await client.request({ method: 'tools/list', params: { _meta: envelope } }); + logger.debug('Successfully listed tools'); + + // Call the add_numbers tool + const addTool = tools.tools.find(t => t.name === 'add_numbers'); + if (addTool) { + const result = await client.request({ + method: 'tools/call', + params: { name: 'add_numbers', arguments: { a: 5, b: 3 }, _meta: envelope } + }); + logger.debug('Tool call result:', JSON.stringify(result, null, 2)); + } + + await client.close(); + logger.debug('Connection closed successfully'); +} + +// request-metadata scenario (SEP-2575): every request must carry the +// MCP-Protocol-Version header and the per-request _meta envelope, and the +// client must retry with a supported version when its first choice is +// rejected with -32004. The version-negotiation probe (server/discover plus +// the corrective continuation) is exactly that mechanism. +async function runRequestMetadataClient(serverUrl: string): Promise { + const clientInfo = { name: 'test-client', version: '1.0.0' }; + const client = new Client(clientInfo, { + capabilities: { roots: { listChanged: true }, sampling: {}, elicitation: {} }, + versionNegotiation: { mode: 'auto' } + }); + + const transport = new StreamableHTTPClientTransport(new URL(serverUrl)); + + await client.connect(transport); + logger.debug('Negotiated protocol version:', client.getNegotiatedProtocolVersion()); + + await client.close(); + logger.debug('Connection closed successfully'); +} + registerScenario('initialize', runBasicClient); registerScenario('tools_call', runToolsCallClient); +registerScenario('request-metadata', runRequestMetadataClient); // ============================================================================ // Auth scenarios - well-behaved client diff --git a/test/conformance/src/everythingServer.ts b/test/conformance/src/everythingServer.ts index 387054f0b1..16c7d49be1 100644 --- a/test/conformance/src/everythingServer.ts +++ b/test/conformance/src/everythingServer.ts @@ -12,7 +12,7 @@ import { randomUUID } from 'node:crypto'; import { localhostHostValidation } from '@modelcontextprotocol/express'; import { NodeStreamableHTTPServerTransport } from '@modelcontextprotocol/node'; import type { CallToolResult, EventId, EventStore, GetPromptResult, ReadResourceResult, StreamId } from '@modelcontextprotocol/server'; -import { isInitializeRequest, McpServer, ResourceTemplate } from '@modelcontextprotocol/server'; +import { classifyInboundRequest, createMcpHandler, isInitializeRequest, McpServer, ResourceTemplate } from '@modelcontextprotocol/server'; import cors from 'cors'; import type { Request, Response } from 'express'; import express from 'express'; @@ -872,6 +872,23 @@ function createMcpServer() { return mcpServer; } +// ===== 2026-07-28 (MODERN ERA) SERVING ===== + +// Modern-era traffic — requests claiming the per-request `_meta` envelope +// mechanism (SEP-2575), including `server/discover` and malformed variants of +// the claim — is served through `createMcpHandler`, backed by the same +// `createMcpServer()` fixture definition the 2025 sessions use. Legacy traffic +// never reaches this handler (see the routing in the POST handler below), so +// the 2025 stateful session path is unchanged. +const modernHandler = createMcpHandler(() => createMcpServer(), { + onerror: error => console.error('Modern-era MCP handler error:', error) +}); + +/** Normalize a possibly-repeated HTTP header to its first value. */ +function headerValue(value: string | string[] | undefined): string | undefined { + return Array.isArray(value) ? value[0] : value; +} + // ===== EXPRESS APP ===== const app = express(); @@ -894,6 +911,23 @@ app.post('/mcp', async (req: Request, res: Response) => { const sessionId = req.headers['mcp-session-id'] as string | undefined; try { + // 2026-07-28 (modern era) traffic: anything claiming the per-request + // envelope mechanism — including malformed claims, which must get the + // modern validation-ladder errors rather than the 2025 session errors — + // is served by the createMcpHandler entry. Legacy-classified requests + // (initialize, no-claim traffic, batches, posted responses) fall + // through to the stateful 2025 session path below, untouched. + const inbound = classifyInboundRequest({ + httpMethod: req.method, + protocolVersionHeader: headerValue(req.headers['mcp-protocol-version']), + mcpMethodHeader: headerValue(req.headers['mcp-method']), + body: req.body + }); + if (inbound.kind !== 'legacy') { + await modernHandler.node(req, res, req.body); + return; + } + let transport: NodeStreamableHTTPServerTransport; if (sessionId && transports[sessionId]) { From c033609d97efa4e332e4a80949991011aba82546 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 16 Jun 2026 14:42:41 +0000 Subject: [PATCH 2/7] test(conformance): refresh the expected-failures notes against the merged serving stack Re-ran the burn-down on the integration branch tip. The error-id-echo cells and the enveloped-initialize removed-method cell in server-stateless now pass (error responses echo the request JSON-RPC id; an initialize carrying a valid 2026 envelope is answered 404/-32601), so the entry's note no longer lists them as blockers. No entries are removed: server-stateless is still held by the disputed envelope/header error-code cells pending conformance #336, and every other entry still fails for the reason already recorded. --- test/conformance/expected-failures.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/conformance/expected-failures.yaml b/test/conformance/expected-failures.yaml index be2d4cd020..603f52a927 100644 --- a/test/conformance/expected-failures.yaml +++ b/test/conformance/expected-failures.yaml @@ -57,11 +57,13 @@ client: server: # --- Draft-spec scenarios (in `--suite draft`; the default `active` suite is green) --- # SEP-2575 (stateless HTTP / _meta envelope): the fixture serves the - # 2026-07-28 path, but the scenario still fails on (a) error responses not - # echoing the request JSON-RPC id, (b) the envelope/header error-code cells - # that are parameterized pending conformance #336 (see header note), and - # (c) removed-method handling for `initialize` (404/-32601 expected, - # 400/-32004 served). + # 2026-07-28 path; error responses now echo the request JSON-RPC id and + # `initialize` gets removed-method handling (404/-32601), so the only + # remaining failures are the envelope/header error-code cells that are + # parameterized pending conformance #336 (see header note): missing _meta + # and missing protocolVersion expect -32602 and the header/body mismatch + # cell expects -32001 HeaderMismatch, while the SDK emits the provisional + # -32004. - server-stateless # SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented # in the SDK, so the fixture does not register the scenarios' diagnostic From 2a7c65feb6416b1d37be00ad35c787498a071e0d Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 16 Jun 2026 15:44:08 +0000 Subject: [PATCH 3/7] test(conformance): gate the carried-forward suites at spec version 2026-07-28 The existing legs never pass --spec-version, so carry-forward scenarios were only exercised at their default 2025 version. Add one leg per direction that runs --suite all --spec-version 2026-07-28 against its own expected-failures file (the shared name-keyed baseline cannot express version-split outcomes), and wire both legs into the conformance workflow. - test:conformance:server:2026 - 54 passed / 21 failed checks; baseline expected-failures.2026-07-28.yaml (16 scenarios: the same failures as the draft-suite leg plus json-schema-2020-12, which fails identically at 2025). - test:conformance:client:2026 - 206 passed / 37 failed checks; baseline expected-failures.client.2026-07-28.yaml (26 scenarios: tools_call blocked by the referee mocks omitting resultType (fixed upstream, unblocks at the next published conformance release), the SEP-837 application_type check that only fires on draft-version runs, the auth scope-escalation scenarios cut short by the 2026 connection lifecycle, and the scenarios already baselined at 2025). Both legs fail on unexpected failures and stale baseline entries, same as the existing legs. The referee stays the published 0.2.0-alpha.3 pin. --- .github/workflows/conformance.yml | 2 + .../expected-failures.2026-07-28.yaml | 67 +++++++++++++++++ .../expected-failures.client.2026-07-28.yaml | 75 +++++++++++++++++++ test/conformance/package.json | 2 + 4 files changed, 146 insertions(+) create mode 100644 test/conformance/expected-failures.2026-07-28.yaml create mode 100644 test/conformance/expected-failures.client.2026-07-28.yaml diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml index 049b1e8fa0..dd01a74c10 100644 --- a/.github/workflows/conformance.yml +++ b/.github/workflows/conformance.yml @@ -30,6 +30,7 @@ jobs: - run: pnpm install - run: pnpm run build:all - run: pnpm run test:conformance:client:all + - run: pnpm --filter @modelcontextprotocol/test-conformance run test:conformance:client:2026 server-conformance: runs-on: ubuntu-latest @@ -48,3 +49,4 @@ jobs: - run: pnpm run build:all - run: pnpm run test:conformance:server - run: pnpm run test:conformance:server:draft + - run: pnpm --filter @modelcontextprotocol/test-conformance run test:conformance:server:2026 diff --git a/test/conformance/expected-failures.2026-07-28.yaml b/test/conformance/expected-failures.2026-07-28.yaml new file mode 100644 index 0000000000..7c8a48a759 --- /dev/null +++ b/test/conformance/expected-failures.2026-07-28.yaml @@ -0,0 +1,67 @@ +# Expected failures for the carried-forward x 2026-07-28 server leg +# (`test:conformance:server:2026` = `--suite all --spec-version 2026-07-28`). +# +# This baseline is separate from expected-failures.yaml because entries are +# keyed by scenario name only: a scenario that passes in one leg and fails in +# another cannot be expressed in a shared file (the passing leg would flag the +# entry as stale). This file is owned exclusively by the server 2026 leg and +# burns down independently of the 2025 legs. +# +# Baseline established against the published @modelcontextprotocol/conformance +# release pinned in package.json (0.2.0-alpha.3). Newer conformance releases +# are adopted by deliberately bumping the package.json pin and reconciling +# this file in the same change. +# +# NOTE: the draft error-code assignments exercised by the SEP-2243/SEP-2575 +# scenarios (-32001 HeaderMismatch, -32602, -32004) are still under discussion +# upstream (pending conformance #336). Those cells are treated as +# parameterized, not settled: the entries below record today's referee +# behavior and are re-derived when a #336-containing referee is pinned. +# +# Entries are grouped by what unblocks them. As each gap closes the +# corresponding scenarios start passing and MUST be removed from this list +# (the runner fails on stale entries), so the baseline burns down per +# milestone. + +server: + # --- Carried-forward scenarios (also run by the 2025 legs) --- + # Pre-existing fixture/baseline bug: the fixture tool's schema is a plain + # Zod object with none of the JSON Schema 2020-12 keywords the scenario + # checks; it fails identically at 2025 in `--suite all` (not a 2026-path + # regression). + - json-schema-2020-12 + # SEP-2164: server returns -32002 without the requested URI in error.data + # (WARNING-only; the expected-failures evaluator counts WARNINGs as + # failures). Same failure as in the 2025 baseline. + - sep-2164-resource-not-found + + # --- Draft scenarios (same failures and reasons as the `--suite draft` leg) --- + # SEP-2575 (stateless HTTP / _meta envelope): only the envelope/header + # error-code cells fail - missing _meta and missing protocolVersion expect + # -32602 and the header/body mismatch cell expects -32001 HeaderMismatch, + # while the SDK emits the provisional -32004 (parameterized pending + # conformance #336, see header note). + - server-stateless + # SEP-2243 (HTTP header standardization): some header cross-check cells + # (Mcp-Name mismatch, case/whitespace handling) are not rejected yet, and + # the rejection error code is the disputed cell family. + # (Error-code cells parameterized pending conformance #336 - see header note.) + - http-header-validation + # SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented + # in the SDK, so the fixture does not register the scenarios' diagnostic + # test_input_required_result_* tools. + - input-required-result-basic-elicitation + - input-required-result-basic-sampling + - input-required-result-basic-list-roots + - input-required-result-request-state + - input-required-result-multiple-input-requests + - input-required-result-multi-round + - input-required-result-non-tool-request + - input-required-result-result-type + - input-required-result-tampered-state + - input-required-result-capability-check + # SEP-2322 SHOULD-level behaviours (re-request missing inputResponses, + # ignore unrecognized inputResponses keys): WARNING-only, but the + # expected-failures evaluator counts WARNINGs as failures. + - input-required-result-missing-input-response + - input-required-result-ignore-extra-params diff --git a/test/conformance/expected-failures.client.2026-07-28.yaml b/test/conformance/expected-failures.client.2026-07-28.yaml new file mode 100644 index 0000000000..08b33be3ce --- /dev/null +++ b/test/conformance/expected-failures.client.2026-07-28.yaml @@ -0,0 +1,75 @@ +# Expected failures for the carried-forward x 2026-07-28 client leg +# (`test:conformance:client:2026` = `--suite all --spec-version 2026-07-28`). +# +# Separate from expected-failures.yaml for the same reason as +# expected-failures.2026-07-28.yaml: entries are keyed by scenario name only, +# so version-split outcomes (pass at the scenario's default version, fail when +# forced to 2026-07-28, or vice versa) cannot live in a shared file. This file +# is owned exclusively by the client 2026 leg and burns down independently. +# +# Baseline established against the published @modelcontextprotocol/conformance +# release pinned in package.json (0.2.0-alpha.3). Newer conformance releases +# are adopted by deliberately bumping the package.json pin and reconciling +# this file in the same change. +# +# Entries are grouped by what unblocks them. As each gap closes the +# corresponding scenarios start passing and MUST be removed from this list +# (the runner fails on stale entries), so the baseline burns down per +# milestone. + +client: + # --- Blocked by the referee's mock servers --- + # The alpha.3 mock servers omit resultType from results and the SDK 2026 + # decode rejects them ("servers implementing protocol revision 2026-07-28 + # MUST include it"), so the client errors on tools/list before the tool + # call under test. The mock fix is already merged upstream in the + # conformance repo; this entry unblocks at the next published conformance + # release + pin bump. + - tools_call + + # --- SEP-837 (application_type during DCR) --- + # The sep-837-application-type-present check only fires on draft-version + # runs; the client omits application_type during Dynamic Client + # Registration, so every auth scenario that reaches DCR fails it on this + # leg (the same scenarios pass at their default version in the 2025 legs). + - auth/metadata-default + - auth/metadata-var1 + - auth/metadata-var2 + - auth/metadata-var3 + - auth/scope-from-www-authenticate + - auth/scope-from-scopes-supported + - auth/scope-omitted-when-undefined + - auth/token-endpoint-auth-basic + - auth/token-endpoint-auth-post + - auth/token-endpoint-auth-none + - auth/offline-access-not-supported + + # --- Auth scenarios cut short by the 2026 connection lifecycle --- + # The fixture's auth flow drives the 2025 stateful lifecycle; the + # 2026-mode mock rejects the MCP POST (-32001, missing + # MCP-Protocol-Version header) before the scope-escalation behaviour these + # scenarios measure, so no authorization requests are observed. Unblocks + # when the auth fixture flow speaks the 2026 per-request lifecycle. + - auth/scope-step-up + - auth/scope-retry-limit + + # --- Same gaps as the 2025 baseline (fail identically when forced to 2026-07-28) --- + # SEP-2322 (multi-round-trip requests): client does not echo requestState / + # handle IncompleteResult yet. + - sep-2322-client-request-state + # SEP-2243 (HTTP standardization): no fixture handler / client header support yet. + - http-custom-headers + - http-invalid-tool-headers + # SEP-2106 (JSON Schema $ref handling): no fixture handler for the scenario yet. + - json-schema-ref-no-deref + # SEP-2468 (authorization response iss parameter): not implemented in the client. + - auth/iss-supported + - auth/iss-not-advertised + - auth/iss-supported-missing + - auth/iss-wrong-issuer + - auth/iss-unexpected + - auth/iss-normalized + - auth/metadata-issuer-mismatch + # SEP-2352 (authorization server migration): client does not re-register + # when PRM authorization_servers changes. + - auth/authorization-server-migration diff --git a/test/conformance/package.json b/test/conformance/package.json index 7a1154b8ed..de7d282e93 100644 --- a/test/conformance/package.json +++ b/test/conformance/package.json @@ -30,10 +30,12 @@ "client": "tsx scripts/cli.ts client", "test:conformance:client": "conformance client --command 'node --import tsx ./src/everythingClient.ts' --suite core --expected-failures ./expected-failures.yaml", "test:conformance:client:all": "conformance client --command 'node --import tsx ./src/everythingClient.ts' --suite all --expected-failures ./expected-failures.yaml", + "test:conformance:client:2026": "conformance client --command 'node --import tsx ./src/everythingClient.ts' --suite all --spec-version 2026-07-28 --expected-failures ./expected-failures.client.2026-07-28.yaml", "test:conformance:client:run": "node --import tsx ./src/everythingClient.ts", "test:conformance:server": "scripts/run-server-conformance.sh --expected-failures ./expected-failures.yaml", "test:conformance:server:draft": "scripts/run-server-conformance.sh --suite draft --expected-failures ./expected-failures.yaml", "test:conformance:server:all": "scripts/run-server-conformance.sh --suite all --expected-failures ./expected-failures.yaml", + "test:conformance:server:2026": "scripts/run-server-conformance.sh --suite all --spec-version 2026-07-28 --expected-failures ./expected-failures.2026-07-28.yaml", "test:conformance:server:run": "node --import tsx ./src/everythingServer.ts", "test:conformance:all": "pnpm run test:conformance:client:all && pnpm run test:conformance:server:all" }, From af7e23f919797eb0a0016355a813058ae433c1bb Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 16 Jun 2026 17:33:08 +0000 Subject: [PATCH 4/7] test(conformance): consolidate the 2026-07-28 baselines into one shared file The 2025 legs share a single expected-failures.yaml with separate client: and server: sections; mirror that shape for the 2026-07-28 carried-forward legs. Merge expected-failures.client.2026-07-28.yaml into expected-failures.2026-07-28.yaml (client: section added, entries and reasons unchanged), delete the client-specific file, and point test:conformance:client:2026 at the consolidated file. No entry changes. --- .../expected-failures.2026-07-28.yaml | 84 +++++++++++++++---- .../expected-failures.client.2026-07-28.yaml | 75 ----------------- test/conformance/package.json | 2 +- 3 files changed, 70 insertions(+), 91 deletions(-) delete mode 100644 test/conformance/expected-failures.client.2026-07-28.yaml diff --git a/test/conformance/expected-failures.2026-07-28.yaml b/test/conformance/expected-failures.2026-07-28.yaml index 7c8a48a759..2f2a02dd8a 100644 --- a/test/conformance/expected-failures.2026-07-28.yaml +++ b/test/conformance/expected-failures.2026-07-28.yaml @@ -1,28 +1,82 @@ -# Expected failures for the carried-forward x 2026-07-28 server leg -# (`test:conformance:server:2026` = `--suite all --spec-version 2026-07-28`). +# Expected failures for the carried-forward x 2026-07-28 legs +# (`test:conformance:client:2026` and `test:conformance:server:2026`, both +# `--suite all --spec-version 2026-07-28`). # # This baseline is separate from expected-failures.yaml because entries are -# keyed by scenario name only: a scenario that passes in one leg and fails in -# another cannot be expressed in a shared file (the passing leg would flag the -# entry as stale). This file is owned exclusively by the server 2026 leg and -# burns down independently of the 2025 legs. +# keyed by scenario name only: a scenario that passes at its default version +# in the 2025 legs but fails when forced to 2026-07-28 (or vice versa) cannot +# be expressed in a shared file (the passing leg would flag the entry as +# stale). Like expected-failures.yaml, this single file covers both +# directions: the client 2026 leg reads the `client:` section and the server +# 2026 leg reads the `server:` section. Both burn down independently of the +# 2025 legs. # # Baseline established against the published @modelcontextprotocol/conformance -# release pinned in package.json (0.2.0-alpha.3). Newer conformance releases -# are adopted by deliberately bumping the package.json pin and reconciling -# this file in the same change. -# -# NOTE: the draft error-code assignments exercised by the SEP-2243/SEP-2575 -# scenarios (-32001 HeaderMismatch, -32602, -32004) are still under discussion -# upstream (pending conformance #336). Those cells are treated as -# parameterized, not settled: the entries below record today's referee -# behavior and are re-derived when a #336-containing referee is pinned. +# release pinned in package.json. Newer conformance releases are adopted by +# deliberately bumping the pin and reconciling this file in the same change. # # Entries are grouped by what unblocks them. As each gap closes the # corresponding scenarios start passing and MUST be removed from this list # (the runner fails on stale entries), so the baseline burns down per # milestone. +client: + # --- Blocked by the referee's mock servers --- + # The alpha.3 mock servers omit resultType from results and the SDK 2026 + # decode rejects them ("servers implementing protocol revision 2026-07-28 + # MUST include it"), so the client errors on tools/list before the tool + # call under test. The mock fix is already merged upstream in the + # conformance repo; this entry unblocks at the next published conformance + # release + pin bump. + - tools_call + + # --- SEP-837 (application_type during DCR) --- + # The sep-837-application-type-present check only fires on draft-version + # runs; the client omits application_type during Dynamic Client + # Registration, so every auth scenario that reaches DCR fails it on this + # leg (the same scenarios pass at their default version in the 2025 legs). + - auth/metadata-default + - auth/metadata-var1 + - auth/metadata-var2 + - auth/metadata-var3 + - auth/scope-from-www-authenticate + - auth/scope-from-scopes-supported + - auth/scope-omitted-when-undefined + - auth/token-endpoint-auth-basic + - auth/token-endpoint-auth-post + - auth/token-endpoint-auth-none + - auth/offline-access-not-supported + + # --- Auth scenarios cut short by the 2026 connection lifecycle --- + # The fixture's auth flow drives the 2025 stateful lifecycle; the + # 2026-mode mock rejects the MCP POST (-32001, missing + # MCP-Protocol-Version header) before the scope-escalation behaviour these + # scenarios measure, so no authorization requests are observed. Unblocks + # when the auth fixture flow speaks the 2026 per-request lifecycle. + - auth/scope-step-up + - auth/scope-retry-limit + + # --- Same gaps as the 2025 baseline (fail identically when forced to 2026-07-28) --- + # SEP-2322 (multi-round-trip requests): client does not echo requestState / + # handle IncompleteResult yet. + - sep-2322-client-request-state + # SEP-2243 (HTTP standardization): no fixture handler / client header support yet. + - http-custom-headers + - http-invalid-tool-headers + # SEP-2106 (JSON Schema $ref handling): no fixture handler for the scenario yet. + - json-schema-ref-no-deref + # SEP-2468 (authorization response iss parameter): not implemented in the client. + - auth/iss-supported + - auth/iss-not-advertised + - auth/iss-supported-missing + - auth/iss-wrong-issuer + - auth/iss-unexpected + - auth/iss-normalized + - auth/metadata-issuer-mismatch + # SEP-2352 (authorization server migration): client does not re-register + # when PRM authorization_servers changes. + - auth/authorization-server-migration + server: # --- Carried-forward scenarios (also run by the 2025 legs) --- # Pre-existing fixture/baseline bug: the fixture tool's schema is a plain diff --git a/test/conformance/expected-failures.client.2026-07-28.yaml b/test/conformance/expected-failures.client.2026-07-28.yaml deleted file mode 100644 index 08b33be3ce..0000000000 --- a/test/conformance/expected-failures.client.2026-07-28.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Expected failures for the carried-forward x 2026-07-28 client leg -# (`test:conformance:client:2026` = `--suite all --spec-version 2026-07-28`). -# -# Separate from expected-failures.yaml for the same reason as -# expected-failures.2026-07-28.yaml: entries are keyed by scenario name only, -# so version-split outcomes (pass at the scenario's default version, fail when -# forced to 2026-07-28, or vice versa) cannot live in a shared file. This file -# is owned exclusively by the client 2026 leg and burns down independently. -# -# Baseline established against the published @modelcontextprotocol/conformance -# release pinned in package.json (0.2.0-alpha.3). Newer conformance releases -# are adopted by deliberately bumping the package.json pin and reconciling -# this file in the same change. -# -# Entries are grouped by what unblocks them. As each gap closes the -# corresponding scenarios start passing and MUST be removed from this list -# (the runner fails on stale entries), so the baseline burns down per -# milestone. - -client: - # --- Blocked by the referee's mock servers --- - # The alpha.3 mock servers omit resultType from results and the SDK 2026 - # decode rejects them ("servers implementing protocol revision 2026-07-28 - # MUST include it"), so the client errors on tools/list before the tool - # call under test. The mock fix is already merged upstream in the - # conformance repo; this entry unblocks at the next published conformance - # release + pin bump. - - tools_call - - # --- SEP-837 (application_type during DCR) --- - # The sep-837-application-type-present check only fires on draft-version - # runs; the client omits application_type during Dynamic Client - # Registration, so every auth scenario that reaches DCR fails it on this - # leg (the same scenarios pass at their default version in the 2025 legs). - - auth/metadata-default - - auth/metadata-var1 - - auth/metadata-var2 - - auth/metadata-var3 - - auth/scope-from-www-authenticate - - auth/scope-from-scopes-supported - - auth/scope-omitted-when-undefined - - auth/token-endpoint-auth-basic - - auth/token-endpoint-auth-post - - auth/token-endpoint-auth-none - - auth/offline-access-not-supported - - # --- Auth scenarios cut short by the 2026 connection lifecycle --- - # The fixture's auth flow drives the 2025 stateful lifecycle; the - # 2026-mode mock rejects the MCP POST (-32001, missing - # MCP-Protocol-Version header) before the scope-escalation behaviour these - # scenarios measure, so no authorization requests are observed. Unblocks - # when the auth fixture flow speaks the 2026 per-request lifecycle. - - auth/scope-step-up - - auth/scope-retry-limit - - # --- Same gaps as the 2025 baseline (fail identically when forced to 2026-07-28) --- - # SEP-2322 (multi-round-trip requests): client does not echo requestState / - # handle IncompleteResult yet. - - sep-2322-client-request-state - # SEP-2243 (HTTP standardization): no fixture handler / client header support yet. - - http-custom-headers - - http-invalid-tool-headers - # SEP-2106 (JSON Schema $ref handling): no fixture handler for the scenario yet. - - json-schema-ref-no-deref - # SEP-2468 (authorization response iss parameter): not implemented in the client. - - auth/iss-supported - - auth/iss-not-advertised - - auth/iss-supported-missing - - auth/iss-wrong-issuer - - auth/iss-unexpected - - auth/iss-normalized - - auth/metadata-issuer-mismatch - # SEP-2352 (authorization server migration): client does not re-register - # when PRM authorization_servers changes. - - auth/authorization-server-migration diff --git a/test/conformance/package.json b/test/conformance/package.json index de7d282e93..9a2f67e0fc 100644 --- a/test/conformance/package.json +++ b/test/conformance/package.json @@ -30,7 +30,7 @@ "client": "tsx scripts/cli.ts client", "test:conformance:client": "conformance client --command 'node --import tsx ./src/everythingClient.ts' --suite core --expected-failures ./expected-failures.yaml", "test:conformance:client:all": "conformance client --command 'node --import tsx ./src/everythingClient.ts' --suite all --expected-failures ./expected-failures.yaml", - "test:conformance:client:2026": "conformance client --command 'node --import tsx ./src/everythingClient.ts' --suite all --spec-version 2026-07-28 --expected-failures ./expected-failures.client.2026-07-28.yaml", + "test:conformance:client:2026": "conformance client --command 'node --import tsx ./src/everythingClient.ts' --suite all --spec-version 2026-07-28 --expected-failures ./expected-failures.2026-07-28.yaml", "test:conformance:client:run": "node --import tsx ./src/everythingClient.ts", "test:conformance:server": "scripts/run-server-conformance.sh --expected-failures ./expected-failures.yaml", "test:conformance:server:draft": "scripts/run-server-conformance.sh --suite draft --expected-failures ./expected-failures.yaml", From 6f569492ba8751298f13545b546697e0945ac263 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 16 Jun 2026 17:38:15 +0000 Subject: [PATCH 5/7] test(conformance): bump @modelcontextprotocol/conformance to 0.2.0-alpha.4 0.2.0-alpha.4 makes the runner's mock servers include resultType in results, which the SDK's 2026-07-28 client decode requires; this unblocks the carried-forward client scenarios at the 2026 spec version. Lockfile change is scoped to the conformance package. --- pnpm-lock.yaml | 10 +++++----- test/conformance/package.json | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9ffd38d3dd..483ebc939c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1099,8 +1099,8 @@ importers: specifier: workspace:^ version: link:../../packages/client '@modelcontextprotocol/conformance': - specifier: 0.2.0-alpha.3 - version: 0.2.0-alpha.3(@cfworker/json-schema@4.1.1) + specifier: 0.2.0-alpha.4 + version: 0.2.0-alpha.4(@cfworker/json-schema@4.1.1) '@modelcontextprotocol/core': specifier: workspace:^ version: link:../../packages/core @@ -2111,8 +2111,8 @@ packages: '@manypkg/get-packages@1.1.3': resolution: {integrity: sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A==} - '@modelcontextprotocol/conformance@0.2.0-alpha.3': - resolution: {integrity: sha512-YjdEKaKWswkJtRl0G3RmZCfljkAct3je834sqGHgasGeU2eUp7sb+6sJL0uNEaAY3XXWYumN/mjr6aPZbnbJMA==} + '@modelcontextprotocol/conformance@0.2.0-alpha.4': + resolution: {integrity: sha512-WAz/Q+Fmr2XFcytLkmbNAJvUi0vCciNLQbjkHnaUUSyPcqQZEVNfsLECZWhN8hRS8oGpGDl9OLR9yBtzyGIY2Q==} hasBin: true '@modelcontextprotocol/sdk@1.29.0': @@ -6001,7 +6001,7 @@ snapshots: globby: 11.1.0 read-yaml-file: 1.1.0 - '@modelcontextprotocol/conformance@0.2.0-alpha.3(@cfworker/json-schema@4.1.1)': + '@modelcontextprotocol/conformance@0.2.0-alpha.4(@cfworker/json-schema@4.1.1)': dependencies: '@modelcontextprotocol/sdk': 1.29.0(@cfworker/json-schema@4.1.1)(zod@4.3.6) '@octokit/rest': 22.0.1 diff --git a/test/conformance/package.json b/test/conformance/package.json index 9a2f67e0fc..96becacab1 100644 --- a/test/conformance/package.json +++ b/test/conformance/package.json @@ -40,7 +40,7 @@ "test:conformance:all": "pnpm run test:conformance:client:all && pnpm run test:conformance:server:all" }, "devDependencies": { - "@modelcontextprotocol/conformance": "0.2.0-alpha.3", + "@modelcontextprotocol/conformance": "0.2.0-alpha.4", "@modelcontextprotocol/client": "workspace:^", "@modelcontextprotocol/server": "workspace:^", "@modelcontextprotocol/core": "workspace:^", From 916dfcae8f6cb74f5b3dcfa13cbb91bf9bf6f21b Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 16 Jun 2026 17:51:58 +0000 Subject: [PATCH 6/7] test(conformance): burn down baselines now passing on 0.2.0-alpha.4 With the rejection codes aligned to the referee (-32001 for header/body mismatches, -32602 for a missing _meta envelope or protocolVersion key) and the fixture serving the 2026-07-28 path, server-stateless passes fully (21/21 checks) on the draft and 2026 server legs, so its entry leaves both baselines. The 0.2.0-alpha.4 mock servers now include resultType in results, which the SDK 2026 client decode requires, so tools_call passes on the 2026 client leg and leaves that baseline. Also reconcile the shared baseline's header with the new pin (drop the references to the previous release and to auth scenarios that the published release now ships) and restate the http-header-validation reason in the 2026 baseline in terms of the settled codes: the cells still failing are the missing-header and Mcp-Name cross-check ones, not the error-code cells. --- .../expected-failures.2026-07-28.yaml | 23 ++++--------------- test/conformance/expected-failures.yaml | 17 ++------------ 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/test/conformance/expected-failures.2026-07-28.yaml b/test/conformance/expected-failures.2026-07-28.yaml index 2f2a02dd8a..21792ec3a3 100644 --- a/test/conformance/expected-failures.2026-07-28.yaml +++ b/test/conformance/expected-failures.2026-07-28.yaml @@ -21,15 +21,6 @@ # milestone. client: - # --- Blocked by the referee's mock servers --- - # The alpha.3 mock servers omit resultType from results and the SDK 2026 - # decode rejects them ("servers implementing protocol revision 2026-07-28 - # MUST include it"), so the client errors on tools/list before the tool - # call under test. The mock fix is already merged upstream in the - # conformance repo; this entry unblocks at the next published conformance - # release + pin bump. - - tools_call - # --- SEP-837 (application_type during DCR) --- # The sep-837-application-type-present check only fires on draft-version # runs; the client omits application_type during Dynamic Client @@ -90,16 +81,10 @@ server: - sep-2164-resource-not-found # --- Draft scenarios (same failures and reasons as the `--suite draft` leg) --- - # SEP-2575 (stateless HTTP / _meta envelope): only the envelope/header - # error-code cells fail - missing _meta and missing protocolVersion expect - # -32602 and the header/body mismatch cell expects -32001 HeaderMismatch, - # while the SDK emits the provisional -32004 (parameterized pending - # conformance #336, see header note). - - server-stateless - # SEP-2243 (HTTP header standardization): some header cross-check cells - # (Mcp-Name mismatch, case/whitespace handling) are not rejected yet, and - # the rejection error code is the disputed cell family. - # (Error-code cells parameterized pending conformance #336 - see header note.) + # SEP-2243 (HTTP header standardization): the reject cells the SDK does + # answer now use -32001 (HeaderMismatch), but missing-header enforcement + # (Mcp-Method, Mcp-Name) and the Mcp-Name cross-check are not implemented, + # so those reject cells are still accepted with 200. - http-header-validation # SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented # in the SDK, so the fixture does not register the scenarios' diagnostic diff --git a/test/conformance/expected-failures.yaml b/test/conformance/expected-failures.yaml index 603f52a927..b22573d3f8 100644 --- a/test/conformance/expected-failures.yaml +++ b/test/conformance/expected-failures.yaml @@ -2,13 +2,9 @@ # CI exits 0 if only these fail, exits 1 on unexpected failures or stale entries. # # Baseline established against the published @modelcontextprotocol/conformance -# release pinned in package.json (0.2.0-alpha.3). Newer conformance releases +# release pinned in package.json (0.2.0-alpha.4). Newer conformance releases # are adopted by deliberately bumping the package.json pin and reconciling -# this file in the same change. 0.2.0-alpha.3 fixes the draft wire version -# (2026-07-28). Several auth scenarios in this baseline (auth/iss-*, -# auth/authorization-server-migration, auth/enterprise-managed-authorization) -# are still not shipped in the published release — the runner reports them -# unknown/failed; their entries below cover them either way. +# this file in the same change. # # NOTE: the SDK's modern-path rejection codes are aligned with what this # referee asserts: header/body mismatches answer -32001 (HeaderMismatch) and a @@ -56,15 +52,6 @@ client: server: # --- Draft-spec scenarios (in `--suite draft`; the default `active` suite is green) --- - # SEP-2575 (stateless HTTP / _meta envelope): the fixture serves the - # 2026-07-28 path; error responses now echo the request JSON-RPC id and - # `initialize` gets removed-method handling (404/-32601), so the only - # remaining failures are the envelope/header error-code cells that are - # parameterized pending conformance #336 (see header note): missing _meta - # and missing protocolVersion expect -32602 and the header/body mismatch - # cell expects -32001 HeaderMismatch, while the SDK emits the provisional - # -32004. - - server-stateless # SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented # in the SDK, so the fixture does not register the scenarios' diagnostic # test_input_required_result_* tools. From 7aec2d446daec32a604d9117432e07935eefbd77 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 16 Jun 2026 17:53:31 +0000 Subject: [PATCH 7/7] test(client): update probe-classifier test name for the settled -32001 assignment The -32001 ladder cell is no longer pending an upstream error-code decision: it is the spec-assigned HeaderMismatch code. The probe classifier still never treats it as modern evidence because deployed servers overload it for session-not-found responses. Wording only; assertions unchanged. --- packages/client/test/client/probeClassifier.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/client/test/client/probeClassifier.test.ts b/packages/client/test/client/probeClassifier.test.ts index 5318d30b5e..f442f65fb0 100644 --- a/packages/client/test/client/probeClassifier.test.ts +++ b/packages/client/test/client/probeClassifier.test.ts @@ -234,7 +234,7 @@ describe('row: plain-text/unparseable 400, code 0, empty body, 406, any unrecogn }); describe('row: -32001 / -32003 are NEVER probe-recognized → fall into unrecognized → legacy', () => { - test('-32001 (session-404 overload on deployed servers; ladder cell underived pending conformance #336)', () => { + test('-32001 (session-404 overload on deployed servers; the spec-assigned HeaderMismatch code is still never probe evidence)', () => { expect(classify({ kind: 'rpc-error', code: -32_001, message: 'Session not found' })).toEqual({ kind: 'legacy' }); expect(classify({ kind: 'http-error', status: 404, body: httpErrorBody(-32_001, 'Session not found') })).toEqual({ kind: 'legacy'