3838ROLE_MAJOR = {"previous" : "17" , "target" : "18" }
3939DEFAULT_CACHE = Path ("/tmp/parsersql-pg-compat" )
4040TIMEOUT_SECONDS = 120
41+ CI_CASE_BATCH_SIZE = 1000
4142
4243
4344def runner_path (cache , role ):
@@ -212,6 +213,13 @@ def _postgres_sql_files(cache, version):
212213 return sorted (source_dir .rglob ("*.sql" ))
213214
214215
216+ def is_skippable_runner_error (message ):
217+ return (
218+ "invalid UTF-8" in message
219+ or "statement splitting failed" in message
220+ )
221+
222+
215223def _structural_sources (cache , role , version ):
216224 checkout = _role_checkout (cache , role )
217225 postgres = _postgres_source_dir (cache , version )
@@ -240,6 +248,7 @@ def collect_inventory(cache, role, repo_root, pins):
240248 runner = runner_path (cache , role )
241249 rows = []
242250 errors = []
251+ skipped = []
243252 for sql_file in _postgres_sql_files (cache , version ["pg_version" ]):
244253 try :
245254 rows .extend (
@@ -251,7 +260,11 @@ def collect_inventory(cache, role, repo_root, pins):
251260 )
252261 )
253262 except RuntimeError as error :
254- errors .append (str (error ))
263+ message = str (error )
264+ if is_skippable_runner_error (message ):
265+ skipped .append (message )
266+ else :
267+ errors .append (message )
255268 if errors :
256269 raise RuntimeError (
257270 "PostgreSQL compatibility runner failed:\n "
@@ -260,6 +273,12 @@ def collect_inventory(cache, role, repo_root, pins):
260273 )
261274
262275 accepted , _diagnostics = partition_rows (rows )
276+ if skipped :
277+ print (
278+ f"Skipped { len (skipped )} PostgreSQL corpus file(s) for { role } "
279+ "because they are invalid-encoding or unsplittable negative "
280+ "syntax fixtures."
281+ )
263282 return build_inventory (accepted )
264283
265284
@@ -278,34 +297,40 @@ def run_committed_ci_cases(repo_root, runner, runner_args):
278297 branch = runner_args [runner_args .index ("--branch" ) + 1 ]
279298 commit = runner_args [runner_args .index ("--commit" ) + 1 ]
280299 checked = 0
281- for case in cases :
300+ for offset in range (0 , len (cases ), CI_CASE_BATCH_SIZE ):
301+ batch = cases [offset :offset + CI_CASE_BATCH_SIZE ]
282302 with tempfile .NamedTemporaryFile (
283303 mode = "w" ,
284304 encoding = "utf-8" ,
285305 suffix = ".sql" ,
286306 ) as sql_file :
287- sql_file .write (case ["sql" ].rstrip () + ";\n " )
307+ for case in batch :
308+ sql_file .write (case ["sql" ].rstrip () + "\n ;\n " )
288309 sql_file .flush ()
289310 rows = _run_runner_file (
290311 runner ,
291312 sql_file .name ,
292313 branch = branch ,
293314 commit = commit ,
294315 )
295- if len (rows ) != 1 :
296- raise AssertionError (f"CI case { case ['id' ]} : expected one row" )
297- actual = rows [0 ]
298- if actual ["result" ] != case ["expected_result" ]:
299- raise AssertionError (
300- f"CI case { case ['id' ]} : expected result "
301- f"{ case ['expected_result' ]} , got { actual ['result' ]} "
302- )
303- if actual ["oracle_node" ] != case ["oracle_node" ]:
316+ if len (rows ) != len (batch ):
317+ first_id = batch [0 ]["id" ] if batch else "<empty>"
304318 raise AssertionError (
305- f"CI case { case [ 'id' ] } : expected oracle node "
306- f"{ case [ 'oracle_node' ] } , got { actual [ 'oracle_node' ] } "
319+ f"CI case batch starting at { first_id } : expected "
320+ f"{ len ( batch ) } row(s) , got { len ( rows ) } "
307321 )
308- checked += 1
322+ for case , actual in zip (batch , rows ):
323+ if actual ["result" ] != case ["expected_result" ]:
324+ raise AssertionError (
325+ f"CI case { case ['id' ]} : expected result "
326+ f"{ case ['expected_result' ]} , got { actual ['result' ]} "
327+ )
328+ if actual ["oracle_node" ] != case ["oracle_node" ]:
329+ raise AssertionError (
330+ f"CI case { case ['id' ]} : expected oracle node "
331+ f"{ case ['oracle_node' ]} , got { actual ['oracle_node' ]} "
332+ )
333+ checked += 1
309334 return {"checked" : checked , "skipped" : False }
310335
311336
@@ -377,11 +402,20 @@ def _load_structural_dispositions(repo_root):
377402
378403
379404def _apply_structural_dispositions (features , dispositions ):
380- by_id = {row ["feature_id" ]: row for row in dispositions }
405+ by_id = {
406+ row ["feature_id" ]: row
407+ for row in dispositions
408+ if "feature_id" in row
409+ }
410+ by_kind = {
411+ row ["kind" ]: row
412+ for row in dispositions
413+ if "feature_id" not in row and "kind" in row
414+ }
381415 output = []
382416 for feature in features :
383417 row = dict (feature )
384- disposition = by_id .get (row ["id" ])
418+ disposition = by_id .get (row ["id" ]) or by_kind . get ( row . get ( "kind" ))
385419 if disposition is not None :
386420 row ["disposition" ] = disposition ["disposition" ]
387421 row ["reason" ] = disposition ["reason" ]
0 commit comments