Skip to content

Commit 5fcdd18

Browse files
committed
test: baseline ParserSQL against PostgreSQL 18
1 parent 5d1a12d commit 5fcdd18

13 files changed

Lines changed: 102189 additions & 27 deletions

docs/compatibility/postgresql-18.md

Lines changed: 26043 additions & 0 deletions
Large diffs are not rendered by default.

scripts/pg_compat/generate_report.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,11 @@ def _structural_rows(structural_features, links, *, only_unwitnessed):
122122
):
123123
feature_id = feature.get("id", "")
124124
witness_ids = links.get(feature_id, [])
125+
has_disposition = bool(feature.get("disposition"))
125126
if only_unwitnessed and witness_ids:
126127
continue
128+
if only_unwitnessed and has_disposition:
129+
continue
127130
if not only_unwitnessed and not witness_ids:
128131
continue
129132
rows.append(
@@ -138,6 +141,31 @@ def _structural_rows(structural_features, links, *, only_unwitnessed):
138141
return rows
139142

140143

144+
def _structural_disposition_rows(structural_features):
145+
rows = []
146+
for feature in sorted(
147+
structural_features,
148+
key=lambda row: (
149+
str(row.get("kind", "")),
150+
str(row.get("symbol", "")),
151+
str(row.get("id", "")),
152+
),
153+
):
154+
disposition = feature.get("disposition")
155+
if not disposition:
156+
continue
157+
rows.append(
158+
[
159+
feature.get("id", ""),
160+
feature.get("kind", ""),
161+
feature.get("symbol", ""),
162+
disposition,
163+
feature.get("reason", ""),
164+
]
165+
)
166+
return rows
167+
168+
141169
def _newly_supported_rows(baseline_evaluation):
142170
rows = []
143171
for row in baseline_evaluation.get("allowed", []):
@@ -266,6 +294,19 @@ def generate_report(
266294
_structural_rows(structural_features, links, only_unwitnessed=False),
267295
)
268296
)
297+
lines.extend(
298+
[
299+
"",
300+
"## Reviewed Structural Dispositions",
301+
"",
302+
]
303+
)
304+
lines.extend(
305+
_table(
306+
["Feature ID", "Kind", "Symbol", "Disposition", "Reason"],
307+
_structural_disposition_rows(structural_features),
308+
)
309+
)
269310
lines.extend(
270311
[
271312
"",

scripts/pg_compat/run_compat.py

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
ROLE_MAJOR = {"previous": "17", "target": "18"}
3939
DEFAULT_CACHE = Path("/tmp/parsersql-pg-compat")
4040
TIMEOUT_SECONDS = 120
41+
CI_CASE_BATCH_SIZE = 1000
4142

4243

4344
def runner_path(cache, role):
@@ -212,6 +213,13 @@ def _postgres_sql_files(cache, version):
212213
return sorted(source_dir.rglob("*.sql"))
213214

214215

216+
def is_skippable_runner_error(message):
217+
return (
218+
"invalid UTF-8" in message
219+
or "statement splitting failed" in message
220+
)
221+
222+
215223
def _structural_sources(cache, role, version):
216224
checkout = _role_checkout(cache, role)
217225
postgres = _postgres_source_dir(cache, version)
@@ -240,6 +248,7 @@ def collect_inventory(cache, role, repo_root, pins):
240248
runner = runner_path(cache, role)
241249
rows = []
242250
errors = []
251+
skipped = []
243252
for sql_file in _postgres_sql_files(cache, version["pg_version"]):
244253
try:
245254
rows.extend(
@@ -251,7 +260,11 @@ def collect_inventory(cache, role, repo_root, pins):
251260
)
252261
)
253262
except RuntimeError as error:
254-
errors.append(str(error))
263+
message = str(error)
264+
if is_skippable_runner_error(message):
265+
skipped.append(message)
266+
else:
267+
errors.append(message)
255268
if errors:
256269
raise RuntimeError(
257270
"PostgreSQL compatibility runner failed:\n"
@@ -260,6 +273,12 @@ def collect_inventory(cache, role, repo_root, pins):
260273
)
261274

262275
accepted, _diagnostics = partition_rows(rows)
276+
if skipped:
277+
print(
278+
f"Skipped {len(skipped)} PostgreSQL corpus file(s) for {role} "
279+
"because they are invalid-encoding or unsplittable negative "
280+
"syntax fixtures."
281+
)
263282
return build_inventory(accepted)
264283

265284

@@ -278,34 +297,40 @@ def run_committed_ci_cases(repo_root, runner, runner_args):
278297
branch = runner_args[runner_args.index("--branch") + 1]
279298
commit = runner_args[runner_args.index("--commit") + 1]
280299
checked = 0
281-
for case in cases:
300+
for offset in range(0, len(cases), CI_CASE_BATCH_SIZE):
301+
batch = cases[offset:offset + CI_CASE_BATCH_SIZE]
282302
with tempfile.NamedTemporaryFile(
283303
mode="w",
284304
encoding="utf-8",
285305
suffix=".sql",
286306
) as sql_file:
287-
sql_file.write(case["sql"].rstrip() + ";\n")
307+
for case in batch:
308+
sql_file.write(case["sql"].rstrip() + "\n;\n")
288309
sql_file.flush()
289310
rows = _run_runner_file(
290311
runner,
291312
sql_file.name,
292313
branch=branch,
293314
commit=commit,
294315
)
295-
if len(rows) != 1:
296-
raise AssertionError(f"CI case {case['id']}: expected one row")
297-
actual = rows[0]
298-
if actual["result"] != case["expected_result"]:
299-
raise AssertionError(
300-
f"CI case {case['id']}: expected result "
301-
f"{case['expected_result']}, got {actual['result']}"
302-
)
303-
if actual["oracle_node"] != case["oracle_node"]:
316+
if len(rows) != len(batch):
317+
first_id = batch[0]["id"] if batch else "<empty>"
304318
raise AssertionError(
305-
f"CI case {case['id']}: expected oracle node "
306-
f"{case['oracle_node']}, got {actual['oracle_node']}"
319+
f"CI case batch starting at {first_id}: expected "
320+
f"{len(batch)} row(s), got {len(rows)}"
307321
)
308-
checked += 1
322+
for case, actual in zip(batch, rows):
323+
if actual["result"] != case["expected_result"]:
324+
raise AssertionError(
325+
f"CI case {case['id']}: expected result "
326+
f"{case['expected_result']}, got {actual['result']}"
327+
)
328+
if actual["oracle_node"] != case["oracle_node"]:
329+
raise AssertionError(
330+
f"CI case {case['id']}: expected oracle node "
331+
f"{case['oracle_node']}, got {actual['oracle_node']}"
332+
)
333+
checked += 1
309334
return {"checked": checked, "skipped": False}
310335

311336

@@ -377,11 +402,20 @@ def _load_structural_dispositions(repo_root):
377402

378403

379404
def _apply_structural_dispositions(features, dispositions):
380-
by_id = {row["feature_id"]: row for row in dispositions}
405+
by_id = {
406+
row["feature_id"]: row
407+
for row in dispositions
408+
if "feature_id" in row
409+
}
410+
by_kind = {
411+
row["kind"]: row
412+
for row in dispositions
413+
if "feature_id" not in row and "kind" in row
414+
}
381415
output = []
382416
for feature in features:
383417
row = dict(feature)
384-
disposition = by_id.get(row["id"])
418+
disposition = by_id.get(row["id"]) or by_kind.get(row.get("kind"))
385419
if disposition is not None:
386420
row["disposition"] = disposition["disposition"]
387421
row["reason"] = disposition["reason"]

0 commit comments

Comments
 (0)