LucaCappelletti94 · LucaCappelletti94 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+## June 2026: robustness badges
+
+- Each parser page gains a Robustness section with six per-parser badges mined from the parser's own source and behavior, so a chooser can weigh crash-safety alongside speed and coverage.
+- Static panic discipline: a new `featurescan` crate parses each parser's library source with `syn` and counts panic-inducing constructs (panic!, unreachable!, unimplemented!, todo!, unwrap, expect, indexing), excluding tests, benches, and test-helper files, and reads the crate's own lint policy so a parser that bans those lints by design is shown as banned. The counts are a code-smell proxy, not a crash proof.
+- Empirical panic rate: grading now tells a caught panic apart from an honest error, so each parser page reports how often it actually panics on the real corpus instead of returning an error. qusql-parse is the only parser that panics on real input (a fraction of a percent), and turso_parser's many static unreachable! macros never fire, which is exactly why the static and empirical signals are shown side by side.
+- Recursion depth: a child-process probe measures how deeply each parser nests input before it either rejects with a clean recursion-limit error or overflows the stack and aborts the process. Among the pure-Rust parsers only sqlparser-rs (limit 48) and sqlite3-parser (no call recursion) are depth-guarded, while polyglot-sql overflows at depth 232.
+- Unsafe surface (count plus whether the crate forbids unsafe), direct dependency count, and whether the AST derives serde round out the badges.
+- The feature scan and depth probe run as part of `cargo regen`, and their committed JSON snapshots are baked into the site at build time, so the wasm build stays free of network access.
+
 ## June 2026: real engines, batch axis, and the time machine
 
 - Validity is now graded against the real database engines (PostgreSQL, SQLite, MySQL, ClickHouse, DuckDB, SQL Server), run once locally in Docker via testcontainers by the `oracle` crate, with the labels committed under `oracle/labels` so grading and CI need no Docker. Library oracles are gone.

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-members = [".", "viz", "web", "membench", "oracle", "timemachine"]
+members = [".", "viz", "web", "membench", "oracle", "timemachine", "featurescan"]
 default-members = ["."]
 resolver = "2"
 

diff --git a/benches/batch_parsing.rs b/benches/batch_parsing.rs
@@ -14,7 +14,7 @@
 //! parses in that dialect), so the two numbers are directly comparable.
 //!
 //! Only parsers with a multi-statement entry point take part (see
-//! `BenchParser::can_batch`); `databend-common-ast` parses one statement per
+//! `BenchParser::can_batch`). `databend-common-ast` parses one statement per
 //! call and is simply skipped here.
 //!
 //! Output (under `target/batch_dist/`), self-contained for now (not yet wired
@@ -157,7 +157,7 @@ fn main() {
         return;
     }
 
-    // Acceptance checks are panic-guarded; suppress the default panic message so
+    // Acceptance checks are panic-guarded. Suppress the default panic message so
     // a caught panic does not spam stderr.
     std::panic::set_hook(Box::new(|_| {}));
 

diff --git a/featurescan/Cargo.toml b/featurescan/Cargo.toml
@@ -0,0 +1,41 @@
+[package]
+name = "featurescan"
+version = "0.1.0"
+edition = "2021"
+description = "Static source-feature scan of the benchmarked SQL parsers (panic discipline, unsafe, lint policy)"
+license = "MIT"
+publish = false
+default-run = "featurescan"
+
+# Offline analysis crate. It locates each benchmarked parser's source on disk via
+# `cargo metadata` (git checkouts and registry crates alike), parses the library
+# `src/` with `syn`, and records panic-inducing constructs, unsafe usage, and the
+# crate's own lint policy. The committed result (featurescan/data/featurescan.json)
+# is baked into the web metadata, so the wasm build stays free of network access.
+# Not a default workspace member, so `cargo build` never pulls these deps.
+
+[dependencies]
+cargo_metadata = "0.19"
+syn = { version = "2", features = ["full", "visit"] }
+# span-locations gives line numbers on spans, so test-region lines can be
+# subtracted from the LOC used for per-KLOC density.
+proc-macro2 = { version = "1", features = ["span-locations"] }
+quote = "1"
+walkdir = "2"
+toml = "0.8"
+serde_json = "1"
+# Shared schema types (FeatureScan/DepthScan), so the committed JSON the web bakes
+# in has a single source of truth.
+viz = { path = "../viz" }
+# The depth probe runs the real parsers, so it needs the main crate. The static
+# scan binary does not use it, but a workspace member with one heavy dep is fine
+# since featurescan is never part of the default build.
+sql_ast_benchmark = { path = ".." }
+
+[[bin]]
+name = "featurescan"
+path = "src/main.rs"
+
+[[bin]]
+name = "featurescan-depth"
+path = "src/depth.rs"
diff --git a/featurescan/data/depth.json b/featurescan/data/depth.json
@@ -0,0 +1,97 @@
+{
+  "note": "Recursion-depth probe (nested parens, 8 MiB worker stack, ceiling 50000). Each (parser, depth) trial runs in a child process. A clean exit means the depth was handled, a signal kill means a stack overflow. `crash_depth` is the smallest overflowing depth (null = never crashed up to the ceiling). `limit_depth` is the smallest depth the parser rejects instead of accepting (its graceful recursion limit, null = accepts up to the boundary). `guarded` = it rejects deep input cleanly and never crashes. Regenerate with `cargo run -p featurescan --bin featurescan-depth`.",
+  "stack_bytes": 8388608,
+  "ceil": 50000,
+  "parsers": [
+    {
+      "parser": "sqlparser-rs",
+      "dialect": "postgresql",
+      "guarded": true,
+      "shape_rejected": false,
+      "limit_depth": 48,
+      "crash_depth": null,
+      "ceil": 50000
+    },
+    {
+      "parser": "pg_query.rs",
+      "dialect": "postgresql",
+      "guarded": true,
+      "shape_rejected": false,
+      "limit_depth": 9994,
+      "crash_depth": null,
+      "ceil": 50000
+    },
+    {
+      "parser": "pg_query (summary)",
+      "dialect": "postgresql",
+      "guarded": true,
+      "shape_rejected": false,
+      "limit_depth": 9994,
+      "crash_depth": null,
+      "ceil": 50000
+    },
+    {
+      "parser": "qusql-parse",
+      "dialect": "postgresql",
+      "guarded": false,
+      "shape_rejected": false,
+      "limit_depth": null,
+      "crash_depth": 8578,
+      "ceil": 50000
+    },
+    {
+      "parser": "polyglot-sql",
+      "dialect": "postgresql",
+      "guarded": false,
+      "shape_rejected": false,
+      "limit_depth": null,
+      "crash_depth": 232,
+      "ceil": 50000
+    },
+    {
+      "parser": "databend-common-ast",
+      "dialect": "postgresql",
+      "guarded": false,
+      "shape_rejected": false,
+      "limit_depth": null,
+      "crash_depth": 421,
+      "ceil": 50000
+    },
+    {
+      "parser": "orql",
+      "dialect": "oracle",
+      "guarded": false,
+      "shape_rejected": true,
+      "limit_depth": null,
+      "crash_depth": 5184,
+      "ceil": 50000
+    },
+    {
+      "parser": "sqlglot-rust",
+      "dialect": "postgresql",
+      "guarded": false,
+      "shape_rejected": false,
+      "limit_depth": null,
+      "crash_depth": 548,
+      "ceil": 50000
+    },
+    {
+      "parser": "sqlite3-parser",
+      "dialect": "sqlite",
+      "guarded": true,
+      "shape_rejected": false,
+      "limit_depth": null,
+      "crash_depth": null,
+      "ceil": 50000
+    },
+    {
+      "parser": "turso_parser",
+      "dialect": "sqlite",
+      "guarded": false,
+      "shape_rejected": false,
+      "limit_depth": null,
+      "crash_depth": 2199,
+      "ceil": 50000
+    }
+  ]
+}