From d950310c82fd0a9d4eee8f8f818c7f757b1dd0a9 Mon Sep 17 00:00:00 2001 From: fl Date: Thu, 25 Jun 2026 00:03:04 +0800 Subject: [PATCH 1/4] =?UTF-8?q?feat(M6c):=20parse=20@$v/#$v=20binding=20?= =?UTF-8?q?=E2=80=94=20symbols,=20flat-path=20marking,=20S0214/5/6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- spec/joins_spec.lua | 64 ++++++++++++++++++++++++++++++++++++++++++ src/jsonata/errors.lua | 3 ++ src/jsonata/parser.lua | 56 ++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 spec/joins_spec.lua diff --git a/spec/joins_spec.lua b/spec/joins_spec.lua new file mode 100644 index 0000000..fa55ba9 --- /dev/null +++ b/spec/joins_spec.lua @@ -0,0 +1,64 @@ +local jsonata = require("jsonata") +local parser = require("jsonata.parser") + +describe("M6c parser: @ / # bind focus/index on the last flat step", function() + it("a@$x.b flattens to [a(focus x), b] with tuple set", function() + local ast = parser.parse("a@$x.b") + assert.are.equal("path", ast.type) + assert.are.equal("x", ast.steps[1].focus) + assert.is_true(ast.steps[1].tuple) + assert.are.equal("b", ast.steps[2].value) + assert.is_nil(ast.steps[2].focus) + end) + + it("$#$pos wraps a single step with index set", function() + local ast = parser.parse("$#$pos") + assert.are.equal("path", ast.type) + assert.are.equal("pos", ast.steps[1].index) + assert.is_true(ast.steps[1].tuple) + end) + + it("a.b@$l.c@$m keeps a flat 3-step path with two focuses", function() + local ast = parser.parse("a.b@$l.c@$m") + assert.are.equal(3, #ast.steps) + assert.are.equal("l", ast.steps[2].focus) + assert.are.equal("m", ast.steps[3].focus) + end) +end) + +describe("M6c parser: validation errors", function() + it("@ with a non-variable rhs raises S0214 (token @)", function() + local ok, err = pcall(parser.parse, "Account.Order@o.Product") + assert.is_false(ok) + assert.are.equal("S0214", err.code) + assert.are.equal("@", err.token) + end) + + it("# with a non-variable rhs raises S0214 (token #)", function() + local ok, err = pcall(parser.parse, "Account.Order@$o#i.Product") + assert.is_false(ok) + assert.are.equal("S0214", err.code) + assert.are.equal("#", err.token) + end) + + it("@ after a predicate raises S0215", function() + local ok, err = pcall(parser.parse, "Account.Order[1]@$o.Product") + assert.is_false(ok) + assert.are.equal("S0215", err.code) + end) + + it("@ after a sort raises S0216", function() + local ok, err = pcall(parser.parse, "Account.Order^(>OrderID)@$o.Product") + assert.is_false(ok) + assert.are.equal("S0216", err.code) + end) + + it("# after a sort/filter does NOT raise (it indexes the step)", function() + assert.has_no.errors(function() + parser.parse("$^($)#$pos") + end) + assert.has_no.errors(function() + parser.parse("$[[1..4]]#$pos") + end) + end) +end) diff --git a/src/jsonata/errors.lua b/src/jsonata/errors.lua index 25731db..fff2240 100644 --- a/src/jsonata/errors.lua +++ b/src/jsonata/errors.lua @@ -8,6 +8,9 @@ local MESSAGES = { S0201 = "Syntax error", S0203 = "Expected token before end of expression", S0211 = "The symbol cannot be used as a unary operator", + S0214 = "The right side of the {{token}} operator must be a variable name", + S0215 = "A context variable binding must precede any predicates on a step in a path expression", + S0216 = "A context variable binding must precede the order-by clause on a step in a path expression", S0217 = "The object representing the 'parent' cannot be derived from this expression", S0401 = "Type parameters can only be applied to functions and arrays", S0402 = "Choice groups containing parameterized types are not supported", diff --git a/src/jsonata/parser.lua b/src/jsonata/parser.lua index c6953e0..e179cdb 100644 --- a/src/jsonata/parser.lua +++ b/src/jsonata/parser.lua @@ -520,6 +520,29 @@ do end end +-- Context/index binding `@`/`#` (M6c). Both bp 80 (same as `.`/`[`); the led +-- validates a variable rhs (S0214). processAST/flatten_path wire focus/index. +do + local s = symbol("@", 80) + s.led = function(p, t, left) + local rhs = p.expression(80) + if rhs.type ~= "variable" then + errors.raise("S0214", { position = t.position, token = "@" }) + end + return { type = "binary", value = "@", lhs = left, rhs = rhs, position = t.position } + end +end +do + local s = symbol("#", 80) + s.led = function(p, t, left) + local rhs = p.expression(80) + if rhs.type ~= "variable" then + errors.raise("S0214", { position = t.position, token = "#" }) + end + return { type = "binary", value = "#", lhs = left, rhs = rhs, position = t.position } + end +end + function M.parse_raw(source) local p = make_parser(source) p.advance() @@ -656,10 +679,34 @@ local function resolve_ancestry(path, ctx) end end +-- M6c: `@` sets focus, `#` sets index, both mark the step .tuple. Only `@` +-- validates position (S0215 after a predicate, S0216 after a sort); `#` indexes +-- any step. If the lhs flattened to a nested path (e.g. a sort), bind its last step. +local function mark_binding(step, node) + if step.type == "path" then + step = step.steps[#step.steps] + end + if node.value == "@" then + if step.predicate ~= nil or step.keepArray then + errors.raise("S0215", { position = node.position, token = "@" }) + end + if step.type == "sort" then + errors.raise("S0216", { position = node.position, token = "@" }) + end + step.focus = node.rhs.value + else + step.index = node.rhs.value + end + step.tuple = true +end + local function flatten_path(node, steps, ctx) if node.type == "binary" and node.value == "." then flatten_path(node.lhs, steps, ctx) flatten_path(node.rhs, steps, ctx) + elseif node.type == "binary" and (node.value == "@" or node.value == "#") then + flatten_path(node.lhs, steps, ctx) + mark_binding(steps[#steps], node) else steps[#steps + 1] = process_ast(node, ctx) end @@ -761,6 +808,15 @@ process_ast = function(ast, ctx) resolve_ancestry(path, ctx) return path end + if ast.type == "binary" and (ast.value == "@" or ast.value == "#") then + -- top-level `@`/`#` whose lhs is not a `.`-path (e.g. `$#$pos`); flatten it + -- the same way and wrap the single step so the evaluator enters tuple mode. + local steps = {} + flatten_path(ast, steps, ctx) + local path = { type = "path", steps = steps, position = ast.position } + resolve_ancestry(path, ctx) + return path + end if ast.type == "bind" then ast.lhs = process_ast(ast.lhs, ctx) ast.rhs = process_ast(ast.rhs, ctx) From ffa74afc3bd0e04549dd67ec35ab56d82bd306cb Mon Sep 17 00:00:00 2001 From: fl Date: Thu, 25 Jun 2026 00:15:10 +0800 Subject: [PATCH 2/4] feat(M6c): bind @$v focus (no @ advance) + #$v index (0-based) in eval_path_tuple Bind the navigated value under $v without advancing @ (focus freezes the context, making order@$o.product@$p a cross-product), and bind the 0-based result index under $i. Also propagate the tuple stream out of a nested tuple path (a focus/index step the parser wrapped to hold a predicate) so its $v binding survives, and skip undefined operands in comparison type-checks (jsonata returns undefined rather than raising T2010), so the deferred #$pos reorder cases evaluate without crashing. Co-Authored-By: Claude Opus 4.8 --- spec/joins_spec.lua | 81 +++++++++++++++++++++++++++++++++++++++ src/jsonata/evaluator.lua | 68 +++++++++++++++++++++++++++++--- 2 files changed, 144 insertions(+), 5 deletions(-) diff --git a/spec/joins_spec.lua b/spec/joins_spec.lua index fa55ba9..ef2ffa6 100644 --- a/spec/joins_spec.lua +++ b/spec/joins_spec.lua @@ -62,3 +62,84 @@ describe("M6c parser: validation errors", function() end) end) end) + +local function run(src, input) + return jsonata.compile(src):evaluate(input) +end + +describe("M6c eval: #$v index binding (0-based, natural order)", function() + local NUMS = { 3, 1, 4, 1, 5, 9 } + + it("$#$pos[$pos<3] keeps the first three (0-based index)", function() + assert.are.same({ 3, 1, 4 }, run("$#$pos[$pos<3]", NUMS)) + end) + + it("$#$pos[$pos<3][1] then positionally indexes the survivors", function() + assert.are.equal(1, run("$#$pos[$pos<3][1]", NUMS)) + end) + + it("$#$pos[$pos<3]^($)[-1] sorts the survivors and takes the last", function() + assert.are.equal(4, run("$#$pos[$pos<3]^($)[-1]", NUMS)) + end) + + it("index carries through a following step (per input item, 0-based)", function() + local DATA = { + Account = { + Order = { + { OrderID = "o1", Product = { { pid = 1 }, { pid = 2 } } }, + { OrderID = "o2", Product = { { pid = 3 } } }, + }, + }, + } + local res = run("Account.Order#$o.Product.{ 'pid': pid, 'oi': $o }", DATA) + assert.are.same({ + { pid = 1, oi = 0 }, + { pid = 2, oi = 0 }, + { pid = 3, oi = 1 }, + }, res) + end) +end) + +describe("M6c eval: @$v focus binding (cross-product join)", function() + local DATA = { + order = { { oid = "A", pid = 1 }, { oid = "B", pid = 2 } }, + product = { + { pid = 1, name = "Hat" }, + { pid = 2, name = "Shoe" }, + { pid = 1, name = "Cap" }, + }, + } + + it("order@$o.product@$p[$o.pid=$p.pid] joins on pid", function() + local res = run("order@$o.product@$p[$o.pid=$p.pid].{ 'order': $o.oid, 'name': $p.name }", DATA) + assert.are.same({ + { order = "A", name = "Hat" }, + { order = "A", name = "Cap" }, + { order = "B", name = "Shoe" }, + }, res) + end) + + it("focus does NOT advance @: product still evaluates from the root", function() + local res = run("order@$o.product@$p.{ 'o': $o.oid, 'p': $p.name }", DATA) + assert.are.equal(6, #res) + end) +end) + +describe("M6c eval: deferred reorder cases don't crash (red is OK)", function() + it("$[[1..4]]#$pos[$pos>=2] evaluates to a structured value", function() + assert.has_no.errors(function() + run("$[[1..4]]#$pos[$pos>=2]", { 3, 1, 4, 1, 5, 9 }) + end) + end) + it("$^($)#$pos[$pos<3] evaluates to a structured value", function() + assert.has_no.errors(function() + run("$^($)#$pos[$pos<3]", { 3, 1, 4, 1, 5, 9 }) + end) + end) +end) + +describe("M6c eval: % parent + plain paths still work (regression)", function() + it("a.b.%.c resolves the ancestor unchanged", function() + assert.are.equal(7, run("a.b.%.c", { a = { b = 1, c = 7 } })) + end) +end) diff --git a/src/jsonata/evaluator.lua b/src/jsonata/evaluator.lua index 128bee7..5f43d1b 100644 --- a/src/jsonata/evaluator.lua +++ b/src/jsonata/evaluator.lua @@ -88,8 +88,25 @@ local function eval_binary(node, input, env) elseif op == "!=" then return not M.deep_equal(lhs, rhs) elseif op == "<" or op == "<=" or op == ">" or op == ">=" then + -- Validate each DEFINED operand independently (jsonata skips undefined in + -- the type check); if either operand is undefined the result is undefined. + if not V.is_nothing(lhs) then + local lt = V.typeof(lhs) + if lt ~= "number" and lt ~= "string" then + errors.raise("T2010", { value = lhs }) + end + end + if not V.is_nothing(rhs) then + local rt = V.typeof(rhs) + if rt ~= "number" and rt ~= "string" then + errors.raise("T2010", { value = rhs }) + end + end + if V.is_nothing(lhs) or V.is_nothing(rhs) then + return V.NOTHING + end local lt, rt = V.typeof(lhs), V.typeof(rhs) - if (lt ~= "number" and lt ~= "string") or lt ~= rt then + if lt ~= rt then errors.raise("T2010", { value = lhs }) end if op == "<" then @@ -526,11 +543,28 @@ local function finalize_sequence(seq, keep_singleton) end M.finalize_sequence = finalize_sequence +-- A path node is a tuple path when any of its steps carries .tuple. When such a +-- path appears as a nested step inside an enclosing tuple stream (e.g. a focus- +-- bound step that the parser wrapped in a path to attach a predicate), it must +-- yield its tuple stream so the enclosing loop can merge its bindings (its $v), +-- instead of collapsing to bare @ values. +local function path_is_tuple(node) + if node.type ~= "path" then + return false + end + for _, s in ipairs(node.steps) do + if s.tuple then + return true + end + end + return false +end + -- Tuple-stream variant of eval_path: used when any step carries .tuple (an -- ancestor anchor wired by the parser). Tuples flow per item; a step with -- .ancestor binds its INPUT item under the slot label on every output tuple; -- sub-evaluations run with a per-tuple frame so `%` resolves via env lookup. -local function eval_path_tuple(node, input, env) +local function eval_path_tuple(node, input, env, want_tuples) local steps = node.steps local tuples local start = 1 @@ -556,6 +590,16 @@ local function eval_path_tuple(node, input, env) tuples[j][steps[1].ancestor.label] = input end end + if steps[1].index then + for j = 1, #tuples do + tuples[j][steps[1].index] = j - 1 + end + end + if steps[1].focus then + for j = 1, #tuples do + tuples[j][steps[1].focus] = tuples[j]["@"] + end + end if steps[1].predicate then tuples = apply_predicates(tuples, steps[1].predicate, env, true) end @@ -588,7 +632,14 @@ local function eval_path_tuple(node, input, env) local item = t["@"] if not V.is_nothing(item) then local frame = create_frame_from_tuple(env, t) - local res = eval_step_on_item(step, item, frame) + local res + if path_is_tuple(step) then + -- nested tuple path (e.g. focus/index step wrapped to hold a + -- predicate): evaluate it as a tuple stream so its bindings survive. + res = eval_path_tuple(step, item, frame, true) + else + res = eval_step_on_item(step, item, frame) + end if not V.is_nothing(res) then if V.is_sequence(res) and V.get_flag(res, "tuple_stream") then -- nested tuple-returning path: merge its bindings wholesale @@ -607,7 +658,14 @@ local function eval_path_tuple(node, input, env) end for b = 1, #list do local nt = copy_tuple(t) - nt["@"] = list[b] + if step.focus then + nt[step.focus] = list[b] -- bind under $v; @ stays at the parent context + else + nt["@"] = list[b] + end + if step.index then + nt[step.index] = b - 1 -- 0-based position within this item's results + end if step.ancestor then nt[step.ancestor.label] = item end @@ -624,7 +682,7 @@ local function eval_path_tuple(node, input, env) end end - if node.tuple then + if node.tuple or want_tuples then local seq = V.sequence() for j = 1, #tuples do seq[j] = tuples[j] From bda24c95eca0c0ae561781dffda9200aaaa26992 Mon Sep 17 00:00:00 2001 From: fl Date: Thu, 25 Jun 2026 00:24:01 +0800 Subject: [PATCH 3/4] =?UTF-8?q?test(M6c):=20regen=20official-suite=20basel?= =?UTF-8?q?ine=20=E2=80=94=20joins=20group=20gains,=20zero=20regressions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- spec/jsonata-suite/baseline.lua | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/spec/jsonata-suite/baseline.lua b/spec/jsonata-suite/baseline.lua index df552a1..290dd7a 100644 --- a/spec/jsonata-suite/baseline.lua +++ b/spec/jsonata-suite/baseline.lua @@ -99,6 +99,8 @@ return { ["comparison-operators/case024"] = true, ["comparison-operators/case025"] = true, ["comparison-operators/case026"] = true, + ["comparison-operators/case027"] = true, + ["comparison-operators/case028"] = true, ["comparison-operators/deep-equals/0"] = true, ["comparison-operators/deep-equals/1"] = true, ["comparison-operators/deep-equals/10"] = true, @@ -818,10 +820,33 @@ return { ["inclusion-operator/case006"] = true, ["inclusion-operator/case007"] = true, ["inclusion-operator/case008"] = true, + ["joins/employee-map-reduce/0"] = true, + ["joins/employee-map-reduce/11"] = true, ["joins/errors/0"] = true, ["joins/errors/1"] = true, ["joins/errors/2"] = true, ["joins/errors/3"] = true, + ["joins/index/0"] = true, + ["joins/index/1"] = true, + ["joins/index/10"] = true, + ["joins/index/11"] = true, + ["joins/index/13"] = true, + ["joins/index/14"] = true, + ["joins/index/2"] = true, + ["joins/index/3"] = true, + ["joins/index/4"] = true, + ["joins/index/5"] = true, + ["joins/index/7"] = true, + ["joins/index/8"] = true, + ["joins/index/9"] = true, + ["joins/library-joins/0"] = true, + ["joins/library-joins/1"] = true, + ["joins/library-joins/2"] = true, + ["joins/library-joins/3"] = true, + ["joins/library-joins/4"] = true, + ["joins/library-joins/5"] = true, + ["joins/library-joins/6"] = true, + ["joins/library-joins/9"] = true, ["lambdas/case000"] = true, ["lambdas/case001"] = true, ["lambdas/case002"] = true, @@ -975,6 +1000,7 @@ return { ["partial-application/case003"] = true, ["partial-application/case004"] = true, ["performance/case000"] = true, + ["performance/case001"] = true, ["predicates/case000"] = true, ["predicates/case001"] = true, ["predicates/case002"] = true, From 8661647c8d033d7bdc0ce32f6a63efd06b016f4e Mon Sep 17 00:00:00 2001 From: fl Date: Thu, 25 Jun 2026 00:33:30 +0800 Subject: [PATCH 4/4] =?UTF-8?q?fix(M6c):=20comparison=20type-mismatch=20ra?= =?UTF-8?q?ises=20T2009=20(not=20T2010)=20=E2=80=94=20oracle=20fidelity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The />= operator (reworked in M6c for undefined-operand handling) conflated jsonata's T2010 (operand not number/string) with T2009 (operands of different types). "a" < 3 now raises T2009, matching jsonata-js v2.2.1. Co-Authored-By: Claude Opus 4.8 --- spec/operators_spec.lua | 22 ++++++++++++++++++++++ src/jsonata/errors.lua | 1 + src/jsonata/evaluator.lua | 2 +- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/spec/operators_spec.lua b/spec/operators_spec.lua index f7b3a4c..1a02a66 100644 --- a/spec/operators_spec.lua +++ b/spec/operators_spec.lua @@ -104,3 +104,25 @@ describe("?? / ?: right-greedy RHS (matches jsonata expression(0))", function() assert.are.equal(0, run('0 ?? "x" ? "b" : "c"')) end) end) + +describe("comparison operator error codes (oracle fidelity)", function() + it("raises T2009 when operands are number/string of different types", function() + local ok1, err1 = pcall(run, '"a" < 3') + assert.is_false(ok1) + assert.are.equal("T2009", err1.code) + + local ok2, err2 = pcall(run, '3 <= "a"') + assert.is_false(ok2) + assert.are.equal("T2009", err2.code) + end) + + it("raises T2010 when an operand is not a number or string", function() + local ok1, err1 = pcall(run, "false > 1") + assert.is_false(ok1) + assert.are.equal("T2010", err1.code) + + local ok2, err2 = pcall(run, 'null <= "x"') + assert.is_false(ok2) + assert.are.equal("T2010", err2.code) + end) +end) diff --git a/src/jsonata/errors.lua b/src/jsonata/errors.lua index fff2240..d5e7d69 100644 --- a/src/jsonata/errors.lua +++ b/src/jsonata/errors.lua @@ -27,6 +27,7 @@ local MESSAGES = { T2004 = "The right side of the range operator (..) must evaluate to an integer", T2007 = "Type mismatch when comparing values {{value}} and {{value2}} in order-by clause", T2008 = "The expressions within an order-by clause must evaluate to numeric or string values", + T2009 = "The values {{value}} and {{value2}} either side of operator {{token}} must be of the same data type", T2010 = "Operands of comparison must both be numbers or both be strings", T2011 = "The insert/update clause of the transform expression must evaluate to an object", T2012 = "The delete clause of the transform expression must evaluate to an array of strings", diff --git a/src/jsonata/evaluator.lua b/src/jsonata/evaluator.lua index 5f43d1b..6f30d20 100644 --- a/src/jsonata/evaluator.lua +++ b/src/jsonata/evaluator.lua @@ -107,7 +107,7 @@ local function eval_binary(node, input, env) end local lt, rt = V.typeof(lhs), V.typeof(rhs) if lt ~= rt then - errors.raise("T2010", { value = lhs }) + errors.raise("T2009", { value = lhs, value2 = rhs }) end if op == "<" then return lhs < rhs