diff --git a/spec/jsonata-suite/baseline.lua b/spec/jsonata-suite/baseline.lua index 1216d8d..4264462 100644 --- a/spec/jsonata-suite/baseline.lua +++ b/spec/jsonata-suite/baseline.lua @@ -571,6 +571,11 @@ return { ["function-power/case002"] = true, ["function-power/case003"] = true, ["function-power/case004"] = true, + ["function-replace/case000"] = true, + ["function-replace/case001"] = true, + ["function-replace/case002"] = true, + ["function-replace/case003"] = true, + ["function-replace/case004"] = true, ["function-replace/case005"] = true, ["function-replace/case006"] = true, ["function-replace/case007"] = true, @@ -899,6 +904,7 @@ return { ["literals/case017"] = true, ["literals/case018"] = true, ["literals/case019"] = true, + ["matchers/case000"] = true, ["matchers/case001"] = true, ["missing-paths/case000"] = true, ["missing-paths/case001"] = true, @@ -1058,9 +1064,38 @@ return { ["regex/case004"] = true, ["regex/case005"] = true, ["regex/case006"] = true, + ["regex/case007"] = true, + ["regex/case008"] = true, + ["regex/case009"] = true, + ["regex/case010"] = true, + ["regex/case011"] = true, + ["regex/case012"] = true, + ["regex/case013"] = true, + ["regex/case014"] = true, + ["regex/case015"] = true, + ["regex/case016"] = true, + ["regex/case017"] = true, + ["regex/case018"] = true, + ["regex/case019"] = true, + ["regex/case020"] = true, + ["regex/case021"] = true, ["regex/case022"] = true, + ["regex/case023"] = true, + ["regex/case024"] = true, + ["regex/case025"] = true, + ["regex/case026"] = true, + ["regex/case027"] = true, + ["regex/case028"] = true, + ["regex/case029"] = true, + ["regex/case030"] = true, + ["regex/case031"] = true, + ["regex/case032"] = true, + ["regex/case033"] = true, + ["regex/case034"] = true, ["regex/case035"] = true, ["regex/case036"] = true, + ["regex/case037"] = true, + ["regex/case038"] = true, ["simple-array-selectors/case000"] = true, ["simple-array-selectors/case001"] = true, ["simple-array-selectors/case002"] = true, diff --git a/spec/regex_spec.lua b/spec/regex_spec.lua index 215ee5c..ac74681 100644 --- a/spec/regex_spec.lua +++ b/spec/regex_spec.lua @@ -75,3 +75,81 @@ describe("M7a: $contains / $split with regex", function() assert.are.same({ "Hello", "World" }, run('$split("Hello World", " ")')) end) end) + +describe("M7b: H.serialize skips function-valued object keys", function() + it("$string of a raw match object omits the next field (valid JSON)", function() + -- a regex value applied to a string returns {match,start,end,groups,next}; + -- $string must NOT emit "next": (a function) -> invalid JSON + local s = run('$string(($m := /b+/; $m("abbc")))') + assert.is_string(s) + assert.is_nil(s:find('"next"')) + assert.is_truthy(s:find('"match":"bb"')) + end) +end) + +describe("M7b: $match", function() + it("returns an array of {match,index,groups}", function() + assert.are.same({ + { match = "ab", index = 0, groups = { "b" } }, + { match = "abb", index = 2, groups = { "bb" } }, + { match = "abbbb", index = 5, groups = { "bbbb" } }, + }, run('$match("ababbabbbb", /a(b+)/)')) + end) + it("limit 1 singleton-unwraps to a single object", function() + assert.are.same({ match = "ab", index = 0, groups = { "b" } }, run('$match("ababbabbbb", /a(b+)/, 1)')) + end) + it("maps over the array of matches", function() + assert.are.same({ "ab", "abb", "abbbb" }, run('$match("ababbabbbb", /a(b+)/).match')) + end) + it("no match returns undefined", function() + assert.is_nil(run('$match("xyz", /q+/)')) + end) + it("negative limit raises D3040", function() + local ok, err = pcall(run, '$match("hello", /l/, -1)') + assert.is_false(ok) + assert.are.equal("D3040", err.code) + end) +end) + +describe("M7b: $replace", function() + it("string pattern, literal replacement", function() + assert.are.equal("Hello Everyone", run('$replace("Hello World", "World", "Everyone")')) + assert.are.equal("he1lo", run('$replace("hello", "l", "1", 1)')) + end) + it("regex pattern with $1 group ref", function() + assert.are.equal("-b--bb--bbbb-", run('$replace("ababbabbbb", /a(b+)/, "-$1-")')) + end) + it("$0 whole match and $$ literal", function() + assert.are.equal("[ab][abb][abbbb]", run('$replace("ababbabbbb", /a(b+)/, "[$0]")')) + assert.are.equal("f$$", run('$replace("foo", /o/, "$$")')) + assert.are.equal("$265", run('$replace("265USD", /([0-9]+)USD/, "$$$1")')) + end) + it("function replacer", function() + assert.are.equal("235", run('$replace("ababbabbbb", /a(b+)/, function($m){ $string($length($m.match)) })')) + end) + it("function replacer returning non-string raises D3012", function() + local ok, err = pcall(run, '$replace("abc", /b/, function($m){ 5 })') + assert.is_false(ok) + assert.are.equal("D3012", err.code) + end) + it("empty pattern raises D3010; negative limit raises D3011", function() + local ok1, e1 = pcall(run, '$replace("hello", "", "bye")') + assert.is_false(ok1) + assert.are.equal("D3010", e1.code) + local ok2, e2 = pcall(run, '$replace("hello", /l/, "1", -2)') + assert.is_false(ok2) + assert.are.equal("D3011", e2.code) + end) +end) + +describe("M7b: non-participating capture group (null, not false)", function() + it("$match groups has null for a non-participating optional group", function() + -- $string to compare serialization: null, not false + local s = run('$string($match("ababbabbbb", /a(xyz)?(b+)/))') + assert.is_nil(s:find("false")) + assert.is_truthy(s:find('%[null,"b"%]')) + end) + it("$replace with $N on a non-participating group yields empty (no crash)", function() + assert.are.equal("[][][]", run('$replace("ababbabbbb", /a(xyz)?(b+)/, "[$1]")')) + end) +end) diff --git a/src/jsonata/errors.lua b/src/jsonata/errors.lua index dc843f3..b986f01 100644 --- a/src/jsonata/errors.lua +++ b/src/jsonata/errors.lua @@ -54,6 +54,11 @@ local MESSAGES = { D3141 = "$assert() statement failed", D3120 = "Syntax error in expression passed to function eval: {{value}}", D3121 = "Dynamic error evaluating the expression passed to function eval: {{value}}", + D3010 = "Second argument of replace function cannot be an empty string", + D3011 = "Fourth argument of replace function must evaluate to a positive number", + D3012 = "Attempted to replace a matched string with a non-string value", + D3040 = "Third argument of match function must evaluate to a positive number", + T1010 = "The matcher function argument passed to function {{token}} does not return the correct object structure", } function M.is_error(x) diff --git a/src/jsonata/functions/helpers.lua b/src/jsonata/functions/helpers.lua index 995c3cb..e3f8732 100644 --- a/src/jsonata/functions/helpers.lua +++ b/src/jsonata/functions/helpers.lua @@ -3,6 +3,19 @@ local errors = require("jsonata.errors") local H = {} +-- Lazily reach the evaluator's apply (avoids a load-time require cycle: +-- evaluator -> functions -> helpers -> evaluator). Memoized, so cheap. +local eval +function H.apply(proc, args, context) + eval = eval or require("jsonata.evaluator") + return eval.apply(proc, args, context) +end + +-- A regex literal evaluates to a callable function value tagged `regex = true`. +function H.is_regex(x) + return type(x) == "table" and x._jsonata_function and x.regex or false +end + -- def(impl) -> any number of args -- def(impl, n) -> exactly n (max defaults to min) -- def(impl, min, max) -> between min and max (inclusive) @@ -171,7 +184,11 @@ function H.serialize(x) elseif t == "object" then local parts = {} for _, k in ipairs(V.obj_keys(x)) do - parts[#parts + 1] = '"' .. json_escape(k) .. '":' .. H.serialize(V.obj_get(x, k)) + local val = V.obj_get(x, k) + local is_fn = type(val) == "table" and (val._jsonata_function or val._jsonata_lambda) + if not is_fn and not V.is_nothing(val) then + parts[#parts + 1] = '"' .. json_escape(k) .. '":' .. H.serialize(val) + end end return "{" .. table.concat(parts, ",") .. "}" end diff --git a/src/jsonata/functions/higher_order.lua b/src/jsonata/functions/higher_order.lua index 0d3bed6..5650262 100644 --- a/src/jsonata/functions/higher_order.lua +++ b/src/jsonata/functions/higher_order.lua @@ -5,15 +5,6 @@ local sort = require("jsonata.sort") local R = {} --- Lazily reach the evaluator's apply. Avoids a load-time require cycle: --- evaluator -> functions -> higher_order -> evaluator. By the time any HOF --- runs, evaluator.apply is defined; require is memoized so this is cheap. -local eval -local function apply(proc, args, context) - eval = eval or require("jsonata.evaluator") - return eval.apply(proc, args, context) -end - -- Arity a callback declares: lambda -> #params; builtin -> stored arity (= min). local function arity_of(proc) if type(proc) ~= "table" then @@ -60,7 +51,7 @@ R.map = H.def(function(arr, fn) arr = to_array(arr) local seq = V.sequence() for i = 1, #arr do - local res = apply(fn, hof_args(fn, arr[i], i - 1, arr)) + local res = H.apply(fn, hof_args(fn, arr[i], i - 1, arr)) if not V.is_nothing(res) then seq[#seq + 1] = res end @@ -76,7 +67,7 @@ R.filter = H.def(function(arr, fn) arr = to_array(arr) local seq = V.sequence() for i = 1, #arr do - if H.truthy(apply(fn, hof_args(fn, arr[i], i - 1, arr))) then + if H.truthy(H.apply(fn, hof_args(fn, arr[i], i - 1, arr))) then seq[#seq + 1] = arr[i] end end @@ -113,7 +104,7 @@ R.reduce = H.def(function(arr, fn, init) if a >= 4 then args[4] = arr end - acc = apply(fn, args) + acc = H.apply(fn, args) end return acc end, 2, 3) @@ -130,7 +121,7 @@ R.single = H.def(function(arr, fn) for i = 1, #arr do local positive = true if fn ~= nil then - positive = H.truthy(apply(fn, hof_args(fn, arr[i], i - 1, arr))) + positive = H.truthy(H.apply(fn, hof_args(fn, arr[i], i - 1, arr))) end if positive then if not found then @@ -156,7 +147,7 @@ R.sift = H.def(function(obj, fn) local result = V.object() for _, k in ipairs(V.obj_keys(obj)) do local v = V.obj_get(obj, k) - if H.truthy(apply(fn, hof_args(fn, v, k, obj))) then + if H.truthy(H.apply(fn, hof_args(fn, v, k, obj))) then V.obj_set(result, k, v) end end @@ -174,7 +165,7 @@ R.each = H.def(function(obj, fn) local seq = V.sequence() for _, k in ipairs(V.obj_keys(obj)) do local v = V.obj_get(obj, k) - local res = apply(fn, hof_args(fn, v, k, obj)) + local res = H.apply(fn, hof_args(fn, v, k, obj)) if not V.is_nothing(res) then seq[#seq + 1] = res end @@ -206,7 +197,7 @@ R.sort = H.def(function(arr, comparator) local comp_after if comparator ~= nil then comp_after = function(a, b) - return H.truthy(apply(comparator, { a, b })) + return H.truthy(H.apply(comparator, { a, b })) end else if not (all_of_type(arr, "number") or all_of_type(arr, "string")) then diff --git a/src/jsonata/functions/string.lua b/src/jsonata/functions/string.lua index e1702aa..6a84f77 100644 --- a/src/jsonata/functions/string.lua +++ b/src/jsonata/functions/string.lua @@ -3,19 +3,6 @@ local H = require("jsonata.functions.helpers") local R = {} --- Lazily reach the evaluator's apply (same pattern as higher_order.lua) to --- avoid a load-time require cycle. Used to run a regex value against a string. -local eval -local function apply(proc, args) - eval = eval or require("jsonata.evaluator") - return eval.apply(proc, args) -end - --- A regex literal evaluates to a callable function value tagged `regex = true`. -local function is_regex(x) - return type(x) == "table" and x._jsonata_function and x.regex -end - -- M1 scalar $string (container serialization added in Task 7). local function to_string(x) if V.is_nothing(x) then @@ -172,8 +159,8 @@ R.contains = H.def(function(s, sub) if not require_string(s, "contains", 1) then return V.NOTHING end - if is_regex(sub) then - return not V.is_nothing(apply(sub, { s })) + if H.is_regex(sub) then + return not V.is_nothing(H.apply(sub, { s })) end require_string(sub, "contains", 2) return string.find(s, sub, 1, true) ~= nil @@ -194,10 +181,10 @@ R.split = H.def(function(s, sep, limit) end end local result = V.array({}) - if is_regex(sep) then + if H.is_regex(sep) then local pos = 0 -- 0-based char index into s while true do - local m = apply(sep, { string.sub(s, pos + 1) }) + local m = H.apply(sep, { string.sub(s, pos + 1) }) if V.is_nothing(m) then break end @@ -241,6 +228,151 @@ R.split = H.def(function(s, sep, limit) return result end, 2, 3, ">") +R.match = H.def(function(s, regex, limit) + if not require_string(s, "match", 1) then + return V.NOTHING + end + if not (limit == nil or V.is_nothing(limit)) and limit < 0 then + H.err("D3040", { name = "match", position = 3, value = limit }) + end + local result = V.array({}) + if limit == nil or V.is_nothing(limit) or limit > 0 then + local count = 0 + local m = H.apply(regex, { s }) + while not V.is_nothing(m) and (limit == nil or V.is_nothing(limit) or count < limit) do + local obj = V.object() + V.obj_set(obj, "match", V.obj_get(m, "match")) + V.obj_set(obj, "index", V.obj_get(m, "start")) + V.obj_set(obj, "groups", V.obj_get(m, "groups")) + result[#result + 1] = obj + m = H.apply(V.obj_get(m, "next"), {}) + count = count + 1 + end + end + -- jsonata returns a sequence: 0 -> undefined, 1 -> the bare object, N -> array + if #result == 0 then + return V.NOTHING + elseif #result == 1 then + return result[1] + end + return result +end, 2, 3, "n?:a>") + +-- Build a per-match replacer from a STRING replacement (jsonata $-scanner): +-- $$ -> literal $, $0 -> whole match, $N -> capture group N (maxDigits rule). +local function string_replacer(replacement) + return function(m) + local groups = V.obj_get(m, "groups") + local ngroups = #groups + local whole = V.obj_get(m, "match") + local out = {} + local pos = 1 + local len = #replacement + while pos <= len do + local d = string.find(replacement, "$", pos, true) + if not d then + out[#out + 1] = string.sub(replacement, pos) + break + end + out[#out + 1] = string.sub(replacement, pos, d - 1) + pos = d + 1 + local nextch = string.sub(replacement, pos, pos) + if nextch == "$" then + out[#out + 1] = "$" + pos = pos + 1 + elseif nextch == "0" then + out[#out + 1] = whole + pos = pos + 1 + else + local maxDigits = (ngroups == 0) and 1 or (math.floor(math.log(ngroups) / math.log(10)) + 1) + local function parse_int(n) + local digits = string.sub(replacement, pos, pos + n - 1):match("^%d+") + return digits and tonumber(digits) or nil + end + local idx = parse_int(maxDigits) + if maxDigits > 1 and idx and idx > ngroups then + idx = parse_int(maxDigits - 1) + end + if idx then + if ngroups > 0 then + local sub = groups[idx] + -- only an actual string capture is substituted; a non-participating + -- group (null) yields empty, matching jsonata's $N behaviour + if type(sub) == "string" then + out[#out + 1] = sub + end + end + pos = pos + #tostring(idx) + else + out[#out + 1] = "$" + end + end + end + return table.concat(out) + end +end + +R.replace = H.def(function(s, pattern, replacement, limit) + if V.is_nothing(s) then + return V.NOTHING + end + require_string(s, "replace", 1) + if pattern == "" then + H.err("D3010", { name = "replace", position = 2, value = pattern }) + end + if not (limit == nil or V.is_nothing(limit)) and limit < 0 then + H.err("D3011", { name = "replace", position = 4, value = limit }) + end + + local replacer + if type(replacement) == "table" and (replacement._jsonata_function or replacement._jsonata_lambda) then + replacer = function(m) + return H.apply(replacement, { m }) + end + else + require_string(replacement, "replace", 3) + replacer = string_replacer(replacement) + end + + local out = {} + local count = 0 + local no_limit = (limit == nil or V.is_nothing(limit)) + if no_limit or limit > 0 then + if H.is_regex(pattern) then + local m = H.apply(pattern, { s }) + local position = 0 -- 0-based char index into s + while not V.is_nothing(m) and (no_limit or count < limit) do + local mstart = V.obj_get(m, "start") + out[#out + 1] = string.sub(s, position + 1, mstart) + local rep = replacer(m) + if V.typeof(rep) ~= "string" then + H.err("D3012", { name = "replace", value = rep }) + end + out[#out + 1] = rep + position = mstart + #V.obj_get(m, "match") + count = count + 1 + m = H.apply(V.obj_get(m, "next"), {}) + end + out[#out + 1] = string.sub(s, position + 1) + else + require_string(pattern, "replace", 2) + local position = 1 -- 1-based Lua index + local i = string.find(s, pattern, position, true) + while i and (no_limit or count < limit) do + out[#out + 1] = string.sub(s, position, i - 1) + out[#out + 1] = replacement + position = i + #pattern + count = count + 1 + i = string.find(s, pattern, position, true) + end + out[#out + 1] = string.sub(s, position) + end + else + return s + end + return table.concat(out) +end, 3, 4, "") + R.join = H.def(function(arr, sep) if nothing_guard(arr) then return V.NOTHING diff --git a/src/jsonata/regex.lua b/src/jsonata/regex.lua index 3328377..14debf9 100644 --- a/src/jsonata/regex.lua +++ b/src/jsonata/regex.lua @@ -1,5 +1,7 @@ -- Lazy PCRE2 adapter. require("rex_pcre2") happens on first compile, so -- non-regex programs never load it. +local V = require("jsonata.value") + local M = {} local rex -- cached module @@ -39,6 +41,15 @@ function M.first(matcher, str, from) return nil end local matched = (en < st) and "" or str:sub(st, en) + -- lrexlib yields `false` for a non-participating optional group; jsonata + -- represents it as null (serialized as `null`, skipped in $N substitution). + if caps then + for i = 1, #caps do + if caps[i] == false then + caps[i] = V.NULL + end + end + end return { match = matched, start = st - 1,