From 9587af426372dc6122c3974892654ada3bcec71e Mon Sep 17 00:00:00 2001 From: fl Date: Thu, 25 Jun 2026 18:24:12 +0800 Subject: [PATCH 1/3] fix(M8a): $formatBase rounds half-even (shared H.round_half_even), not floor $formatBase(99.5,2.5) must round 99.5->100; we floored to 99. Extract the half-even rounding from $round into H.round_half_even and use it for value+radix (and reuse it in $formatNumber next). Co-Authored-By: Claude Opus 4.8 --- spec/formatnumber_spec.lua | 22 ++++++++++++++++++++++ src/jsonata/functions/helpers.lua | 27 +++++++++++++++++++++++++++ src/jsonata/functions/numeric.lua | 19 +++---------------- 3 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 spec/formatnumber_spec.lua diff --git a/spec/formatnumber_spec.lua b/spec/formatnumber_spec.lua new file mode 100644 index 0000000..fdcae7a --- /dev/null +++ b/spec/formatnumber_spec.lua @@ -0,0 +1,22 @@ +local jsonata = require("jsonata") +local function run(src, input) + return jsonata.compile(src):evaluate(input) +end + +describe("M8a: $formatBase rounds (half-even), not floors", function() + it("$formatBase(99.5, 2.5) rounds 99.5->100 and 2.5->2", function() + assert.are.equal("1100100", run("$formatBase(99.5, 2.5)")) + end) + it("existing cases unchanged", function() + assert.are.equal("100", run("$formatBase(100)")) + assert.are.equal("1100100", run("$formatBase(100, 2)")) + assert.are.equal("-1100100", run("$formatBase(-100, 2)")) + assert.are.equal("2s", run("$formatBase(100, 36)")) + assert.are.equal("5890840712243076", run("$formatBase(big_id)", { big_id = 5890840712243076 })) + end) + it("radix out of range -> D3100", function() + local ok, err = pcall(run, "$formatBase(100, 37)") + assert.is_false(ok) + assert.are.equal("D3100", err.code) + end) +end) diff --git a/src/jsonata/functions/helpers.lua b/src/jsonata/functions/helpers.lua index e3f8732..ed2b21b 100644 --- a/src/jsonata/functions/helpers.lua +++ b/src/jsonata/functions/helpers.lua @@ -109,6 +109,33 @@ function H.err(code, info) errors.raise(code, info or {}) end +-- Half-to-even (banker's) rounding with optional decimal precision. Shared by +-- $round, $formatBase, $formatNumber. (jsonata's `round`.) +function H.round_half_even(x, precision) + precision = precision or 0 + local factor = 10 ^ precision + local scaled = x * factor + -- Correct binary-float representation error before the half-even test. + -- %.12g snaps values like 452.5000000000001 -> 452.5 (12 sig-fig round), + -- which is what jsonata-js does implicitly via IEEE-754 string coercion. + -- Guard: only apply when |scaled| < 1e13 so we don't truncate large integers + -- (e.g. 5890840712243076 has 16 digits and needs no correction — diff is 0). + if math.abs(scaled) < 1e13 then + scaled = tonumber(string.format("%.12g", scaled)) or scaled + end + local floored = math.floor(scaled) + local diff = scaled - floored + local rounded + if diff < 0.5 then + rounded = floored + elseif diff > 0.5 then + rounded = floored + 1 + else + rounded = (floored % 2 == 0) and floored or floored + 1 + end + return rounded / factor +end + -- Structural equality over internal values (numbers/strings/booleans exact; -- arrays elementwise; objects key-set + recursive; null/nothing by identity). function H.deep_equal(a, b) diff --git a/src/jsonata/functions/numeric.lua b/src/jsonata/functions/numeric.lua index 83eb946..d99b094 100644 --- a/src/jsonata/functions/numeric.lua +++ b/src/jsonata/functions/numeric.lua @@ -65,21 +65,7 @@ R.round = H.def(function(x, precision) return V.NOTHING end precision = (precision == nil or V.is_nothing(precision)) and 0 or math.floor(precision) - local factor = 10 ^ precision - local scaled = x * factor - -- correct binary-float representation error before the half-even test - scaled = tonumber(string.format("%.12g", scaled)) or scaled - local floored = math.floor(scaled) - local diff = scaled - floored - local rounded - if diff < 0.5 then - rounded = floored - elseif diff > 0.5 then - rounded = floored + 1 - else - rounded = (floored % 2 == 0) and floored or floored + 1 - end - return rounded / factor + return H.round_half_even(x, precision) end, 1, 2, "") R.power = H.def(function(base, exp) @@ -104,7 +90,8 @@ R.formatBase = H.def(function(x, radix) if num_guard(x) then return V.NOTHING end - radix = (radix == nil or V.is_nothing(radix)) and 10 or math.floor(radix) + x = H.round_half_even(x) + radix = (radix == nil or V.is_nothing(radix)) and 10 or H.round_half_even(radix) if radix < 2 or radix > 36 then H.err("D3100", { value = radix, message = "$formatBase radix out of range" }) end From f0a4d3852d2168ce14bec6425a7187b9c2214364 Mon Sep 17 00:00:00 2001 From: fl Date: Thu, 25 Jun 2026 18:37:50 +0800 Subject: [PATCH 2/3] =?UTF-8?q?feat(M8a):=20$formatNumber=20=E2=80=94=20XP?= =?UTF-8?q?ath=20decimal-format=20picture=20(faithful=20jsonata=20port)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports jsonata formatNumber (F&O 4.7: splitParts/validate/analyse/format) into functions/formatnumber.lua: optional/mandatory digits, regular+explicit grouping, percent/per-mille, exponent, the options decimal-format overrides, and the unicode digit family. D3080-D3093 picture-validation errors. Adds the full function-formatNumber conformance group (case000-036 + issue785 zero-guard) to the suite baseline. Co-Authored-By: Claude Opus 4.8 --- spec/formatnumber_spec.lua | 32 ++ spec/jsonata-suite/baseline.lua | 28 ++ src/jsonata/errors.lua | 14 + src/jsonata/functions/formatnumber.lua | 637 +++++++++++++++++++++++++ src/jsonata/functions/init.lua | 1 + 5 files changed, 712 insertions(+) create mode 100644 src/jsonata/functions/formatnumber.lua diff --git a/spec/formatnumber_spec.lua b/spec/formatnumber_spec.lua index fdcae7a..cc0f434 100644 --- a/spec/formatnumber_spec.lua +++ b/spec/formatnumber_spec.lua @@ -20,3 +20,35 @@ describe("M8a: $formatBase rounds (half-even), not floors", function() assert.are.equal("D3100", err.code) end) end) + +describe("M8a: $formatNumber decimal-format picture", function() + it("grouping + mandatory decimals", function() + assert.are.equal("12,345.60", run('$formatNumber(12345.6, "#,###.00")')) + assert.are.equal("12,345,678.90", run('$formatNumber(12345678.9, "9,999.99")')) + end) + it("irregular grouping positions", function() + assert.are.equal("1,234.567,890", run('$formatNumber(1234.56789, "9,999.999,999")')) + end) + it("mandatory-digit padding", function() + assert.are.equal("0124", run('$formatNumber(123.9, "9999")')) + assert.are.equal("-006", run('$formatNumber(-6, "000")')) + end) + it("percent and per-mille (default + custom symbol)", function() + assert.are.equal("14%", run('$formatNumber(0.14, "01%")')) + assert.are.equal("485.7‰", run('$formatNumber(0.4857, "###.###‰")')) + assert.are.equal("140pm", run('$formatNumber(0.14, "###pm", {"per-mille": "pm"})')) + end) + it("exponent notation", function() + assert.are.equal("12.346e2", run('$formatNumber(1234.5678, "00.000e0")')) + assert.are.equal("12.346e002", run('$formatNumber(1234.5678, "00.000e000")')) + assert.are.equal("2.3e-1", run('$formatNumber(0.234, "0.0e0")')) + assert.are.equal("0.23e0", run('$formatNumber(0.234, "#.00e0")')) + end) + it("unicode digit family via zero-digit option", function() + -- oracle (jsonata v2.2.1) emits 23.457e3 in the U+2460 family for this input + assert.are.equal("②③.④⑤⑦e③", run('$formatNumber(1234.5678, "①①.①①①e①", {"zero-digit": "①"})')) + end) + it("undefined input -> undefined", function() + assert.is_nil(run('$formatNumber(blah, "0")', {})) + end) +end) diff --git a/spec/jsonata-suite/baseline.lua b/spec/jsonata-suite/baseline.lua index 4264462..4803fea 100644 --- a/spec/jsonata-suite/baseline.lua +++ b/spec/jsonata-suite/baseline.lua @@ -421,11 +421,31 @@ return { ["function-formatBase/case002"] = true, ["function-formatBase/case003"] = true, ["function-formatBase/case004"] = true, + ["function-formatBase/case005"] = true, ["function-formatBase/case006"] = true, ["function-formatBase/case007"] = true, ["function-formatBase/case008"] = true, ["function-formatInteger/formatInteger/15"] = true, ["function-formatInteger/formatInteger/64"] = true, + ["function-formatNumber/case000"] = true, + ["function-formatNumber/case001"] = true, + ["function-formatNumber/case002"] = true, + ["function-formatNumber/case003"] = true, + ["function-formatNumber/case004"] = true, + ["function-formatNumber/case005"] = true, + ["function-formatNumber/case006"] = true, + ["function-formatNumber/case007"] = true, + ["function-formatNumber/case008"] = true, + ["function-formatNumber/case009"] = true, + ["function-formatNumber/case010"] = true, + ["function-formatNumber/case011"] = true, + ["function-formatNumber/case012"] = true, + ["function-formatNumber/case013"] = true, + ["function-formatNumber/case014"] = true, + ["function-formatNumber/case015"] = true, + ["function-formatNumber/case016"] = true, + ["function-formatNumber/case017"] = true, + ["function-formatNumber/case018"] = true, ["function-formatNumber/case019"] = true, ["function-formatNumber/case020"] = true, ["function-formatNumber/case021"] = true, @@ -440,6 +460,14 @@ return { ["function-formatNumber/case030"] = true, ["function-formatNumber/case031"] = true, ["function-formatNumber/case032"] = true, + ["function-formatNumber/case033"] = true, + ["function-formatNumber/case034"] = true, + ["function-formatNumber/case035"] = true, + ["function-formatNumber/case036"] = true, + ["function-formatNumber/issue785/0"] = true, + ["function-formatNumber/issue785/1"] = true, + ["function-formatNumber/issue785/2"] = true, + ["function-formatNumber/issue785/3"] = true, ["function-formatNumber/issue786/0"] = true, ["function-formatNumber/issue786/1"] = true, ["function-formatNumber/issue786/2"] = true, diff --git a/src/jsonata/errors.lua b/src/jsonata/errors.lua index b986f01..107e228 100644 --- a/src/jsonata/errors.lua +++ b/src/jsonata/errors.lua @@ -48,6 +48,20 @@ local MESSAGES = { D3060 = "$sqrt of a number that is less than zero", D3070 = "The single argument of the $sort function must be an array of strings or numbers. Use a comparator function to sort other types.", D3100 = "The radix of $formatBase must be between 2 and 36", + D3080 = "The picture string must not contain more than one instance of the 'percent' character", + D3081 = "The picture string must not contain more than one instance of the 'decimal-separator' character", + D3082 = "The picture string must not contain more than one instance of the 'percent' character", + D3083 = "The picture string must not contain more than one instance of the 'per-mille' character", + D3084 = "The picture string must not contain both a 'percent' and a 'per-mille' character", + D3085 = "The mantissa part of a picture string must contain at least one character that is either an 'optional digit character' or a member of the 'decimal digit family'", + D3086 = "The picture string must not contain a passive character that is preceded by an active character and that is followed by another active character", + D3087 = "The picture string must not contain a 'grouping-separator' character that appears adjacent to a 'decimal-separator' character", + D3088 = "The picture string must not contain a 'grouping-separator' at the end of the integer part", + D3089 = "The picture string must not contain two adjacent instances of the 'grouping-separator' character", + D3090 = "The integer part of the picture string must not contain a member of the 'decimal digit family' that is followed by an instance of the 'optional digit character'", + D3091 = "The fractional part of the picture string must not contain an instance of the 'optional digit character' that is followed by a member of the 'decimal digit family'", + D3092 = "A sub-picture that contains a 'percent' or 'per-mille' character must not contain a character treated as an 'exponent-separator'", + D3093 = "The exponent part of the picture string must comprise only of one or more characters that are members of the 'decimal digit family'", D3137 = "$error() function evaluated", D3138 = "The single() function expected exactly 1 matching result. Instead it matched more.", D3139 = "The single() function expected exactly 1 matching result. Instead it matched 0.", diff --git a/src/jsonata/functions/formatnumber.lua b/src/jsonata/functions/formatnumber.lua new file mode 100644 index 0000000..6ddb678 --- /dev/null +++ b/src/jsonata/functions/formatnumber.lua @@ -0,0 +1,637 @@ +local V = require("jsonata.value") +local H = require("jsonata.functions.helpers") + +local R = {} + +-- =========================================================================== +-- Faithful port of jsonata-js v2.2.1 formatNumber (jsonata.js:2138-2528), +-- the XPath F&O 4.7 decimal-format picture formatter. +-- +-- jsonata operates on JS strings as sequences of UTF-16 code units; the +-- picture string and decimal-format symbols may be multi-byte UTF-8 here +-- (e.g. per-mille U+2030, the U+2460 digit family). To keep the transcription +-- mechanical AND correct, we work over arrays of single-codepoint character +-- strings (via H.utf8_chars) and provide JS-mirroring 0-based string helpers +-- that operate on those character arrays. Every JS string op maps to one Lua +-- call with IDENTICAL indices. +-- =========================================================================== + +-- A "cstr" is { chars = {}, len = n }. We pass these +-- around in place of JS strings so charAt/substring/indexOf are character- +-- (not byte-) accurate. +local function cstr(s) + if type(s) == "table" then + return s + end + local chars = H.utf8_chars(s) + return { chars = chars, len = #chars } +end + +local function cstr_tostring(cs) + return table.concat(cs.chars) +end + +-- 0-based; "" if out of range. Returns a single-char string. +local function charAt(cs, i) + local c = cs.chars[i + 1] + return c or "" +end + +-- 0-based [a, b); b defaults to end. Returns a cstr. +local function substring(cs, a, b) + if b == nil then + b = cs.len + end + if a < 0 then + a = 0 + end + if b > cs.len then + b = cs.len + end + if b < a then + b = a + end + local out = {} + for i = a + 1, b do + out[#out + 1] = cs.chars[i] + end + return { chars = out, len = #out } +end + +-- 0-based; -1 if absent. `sub` is a single-char string (all jsonata uses here +-- search for single characters or the 2-char grouping++grouping case). +local function indexOf(cs, sub, from) + from = from or 0 + if from < 0 then + from = 0 + end + local subc = cstr(sub) + if subc.len == 0 then + return from <= cs.len and from or cs.len + end + for i = from, cs.len - subc.len do + local match = true + for j = 1, subc.len do + if cs.chars[i + j] ~= subc.chars[j] then + match = false + break + end + end + if match then + return i + end + end + return -1 +end + +local function lastIndexOf(cs, sub) + local subc = cstr(sub) + local last = -1 + if subc.len == 0 then + return cs.len + end + for i = 0, cs.len - subc.len do + local match = true + for j = 1, subc.len do + if cs.chars[i + j] ~= subc.chars[j] then + match = false + break + end + end + if match then + last = i + end + end + return last +end + +-- JS arr.indexOf(x) !== -1 over a Lua list of strings. +local function contains(arr, x) + for i = 1, #arr do + if arr[i] == x then + return true + end + end + return false +end + +-- Split a cstr on a single-char literal separator -> list of cstr (JS .split). +local function split_on(cs, sep) + local out = {} + local start = 0 + local pos = indexOf(cs, sep, 0) + while pos ~= -1 do + out[#out + 1] = substring(cs, start, pos) + start = pos + 1 + pos = indexOf(cs, sep, start) + end + out[#out + 1] = substring(cs, start, cs.len) + return out +end + +-- slice(a, b) like JS string.slice over a cstr -> cstr (only non-negative +-- indices are used by jsonata here). +local function slice(cs, a, b) + return substring(cs, a, b) +end + +-- Codepoint of the first character of a single-char string (UTF-8 decode). +local function codepoint(ch) + local b1 = ch:byte(1) + if not b1 then + return 0 + end + if b1 < 0x80 then + return b1 + elseif b1 < 0xE0 then + return (b1 - 0xC0) * 0x40 + (ch:byte(2) - 0x80) + elseif b1 < 0xF0 then + return (b1 - 0xE0) * 0x1000 + (ch:byte(2) - 0x80) * 0x40 + (ch:byte(3) - 0x80) + else + return (b1 - 0xF0) * 0x40000 + (ch:byte(2) - 0x80) * 0x1000 + (ch:byte(3) - 0x80) * 0x40 + (ch:byte(4) - 0x80) + end +end + +-- UTF-8 encode a codepoint -> string. +local function from_codepoint(cp) + if cp < 0x80 then + return string.char(cp) + elseif cp < 0x800 then + return string.char(0xC0 + math.floor(cp / 0x40), 0x80 + (cp % 0x40)) + elseif cp < 0x10000 then + return string.char(0xE0 + math.floor(cp / 0x1000), 0x80 + (math.floor(cp / 0x40) % 0x40), 0x80 + (cp % 0x40)) + else + return string.char(0xF0 + math.floor(cp / 0x40000), 0x80 + (math.floor(cp / 0x1000) % 0x40), 0x80 + (math.floor(cp / 0x40) % 0x40), 0x80 + (cp % 0x40)) + end +end + +R.formatNumber = H.def(function(value, picture, options) + -- undefined inputs always return undefined + if V.is_nothing(value) then + return V.NOTHING + end + + local properties = { + ["decimal-separator"] = ".", + ["grouping-separator"] = ",", + ["exponent-separator"] = "e", + ["infinity"] = "Infinity", + ["minus-sign"] = "-", + ["NaN"] = "NaN", + ["percent"] = "%", + ["per-mille"] = from_codepoint(0x2030), + ["zero-digit"] = "0", + ["digit"] = "#", + ["pattern-separator"] = ";", + } + + -- if `options` is specified, then its entries override defaults + if not V.is_nothing(options) then + for _, key in ipairs(V.obj_keys(options)) do + properties[key] = V.obj_get(options, key) + end + end + + local decimalDigitFamily = {} + local zeroCharCode = codepoint(properties["zero-digit"]) + for ii = zeroCharCode, zeroCharCode + 9 do + decimalDigitFamily[#decimalDigitFamily + 1] = from_codepoint(ii) + end + + local activeChars = {} + for i = 1, #decimalDigitFamily do + activeChars[#activeChars + 1] = decimalDigitFamily[i] + end + activeChars[#activeChars + 1] = properties["decimal-separator"] + activeChars[#activeChars + 1] = properties["exponent-separator"] + activeChars[#activeChars + 1] = properties["grouping-separator"] + activeChars[#activeChars + 1] = properties["digit"] + activeChars[#activeChars + 1] = properties["pattern-separator"] + + local subPictures = split_on(cstr(picture), properties["pattern-separator"]) + + if #subPictures > 2 then + H.err("D3080") + end + + local splitParts = function(subpicture) + local prefix = (function() + for ii = 0, subpicture.len - 1 do + local ch = charAt(subpicture, ii) + if contains(activeChars, ch) and ch ~= properties["exponent-separator"] then + return substring(subpicture, 0, ii) + end + end + return cstr("") + end)() + local suffix = (function() + for ii = subpicture.len - 1, 0, -1 do + local ch = charAt(subpicture, ii) + if contains(activeChars, ch) and ch ~= properties["exponent-separator"] then + return substring(subpicture, ii + 1) + end + end + return cstr("") + end)() + local activePart = substring(subpicture, prefix.len, subpicture.len - suffix.len) + local mantissaPart, exponentPart, integerPart, fractionalPart + local exponentPosition = indexOf(subpicture, properties["exponent-separator"], prefix.len) + if exponentPosition == -1 or exponentPosition > subpicture.len - suffix.len then + mantissaPart = activePart + exponentPart = nil + else + mantissaPart = substring(activePart, 0, exponentPosition) + exponentPart = substring(activePart, exponentPosition + 1) + end + local decimalPosition = indexOf(mantissaPart, properties["decimal-separator"]) + if decimalPosition == -1 then + integerPart = mantissaPart + fractionalPart = suffix + else + integerPart = substring(mantissaPart, 0, decimalPosition) + fractionalPart = substring(mantissaPart, decimalPosition + 1) + end + return { + prefix = prefix, + suffix = suffix, + activePart = activePart, + mantissaPart = mantissaPart, + exponentPart = exponentPart, + integerPart = integerPart, + fractionalPart = fractionalPart, + subpicture = subpicture, + } + end + + -- validate the picture string, F&O 4.7.3 + local validate = function(parts) + local error_code + local subpicture = parts.subpicture + local decimalPos = indexOf(subpicture, properties["decimal-separator"]) + if decimalPos ~= lastIndexOf(subpicture, properties["decimal-separator"]) then + error_code = "D3081" + end + if indexOf(subpicture, properties.percent) ~= lastIndexOf(subpicture, properties.percent) then + error_code = "D3082" + end + if indexOf(subpicture, properties["per-mille"]) ~= lastIndexOf(subpicture, properties["per-mille"]) then + error_code = "D3083" + end + if indexOf(subpicture, properties.percent) ~= -1 and indexOf(subpicture, properties["per-mille"]) ~= -1 then + error_code = "D3084" + end + local valid = false + for ii = 0, parts.mantissaPart.len - 1 do + local ch = charAt(parts.mantissaPart, ii) + if contains(decimalDigitFamily, ch) or ch == properties.digit then + valid = true + break + end + end + if not valid then + error_code = "D3085" + end + -- charTypes: 'p' (passive) for chars not in activeChars, 'a' otherwise. + local hasPassive = false + for i = 1, parts.activePart.len do + if not contains(activeChars, parts.activePart.chars[i]) then + hasPassive = true + break + end + end + if hasPassive then + error_code = "D3086" + end + if decimalPos ~= -1 then + if charAt(subpicture, decimalPos - 1) == properties["grouping-separator"] or charAt(subpicture, decimalPos + 1) == properties["grouping-separator"] then + error_code = "D3087" + end + elseif charAt(parts.integerPart, parts.integerPart.len - 1) == properties["grouping-separator"] then + error_code = "D3088" + end + if indexOf(subpicture, properties["grouping-separator"] .. properties["grouping-separator"]) ~= -1 then + error_code = "D3089" + end + local optionalDigitPos = indexOf(parts.integerPart, properties.digit) + if optionalDigitPos ~= -1 then + local before = substring(parts.integerPart, 0, optionalDigitPos) + local found = false + for i = 1, before.len do + if contains(decimalDigitFamily, before.chars[i]) then + found = true + break + end + end + if found then + error_code = "D3090" + end + end + optionalDigitPos = lastIndexOf(parts.fractionalPart, properties.digit) + if optionalDigitPos ~= -1 then + local after = substring(parts.fractionalPart, optionalDigitPos) + local found = false + for i = 1, after.len do + if contains(decimalDigitFamily, after.chars[i]) then + found = true + break + end + end + if found then + error_code = "D3091" + end + end + local exponentExists = (parts.exponentPart ~= nil) + if + exponentExists + and parts.exponentPart.len > 0 + and (indexOf(subpicture, properties.percent) ~= -1 or indexOf(subpicture, properties["per-mille"]) ~= -1) + then + error_code = "D3092" + end + if exponentExists then + local allDigits = true + if parts.exponentPart.len == 0 then + allDigits = false + else + for i = 1, parts.exponentPart.len do + if not contains(decimalDigitFamily, parts.exponentPart.chars[i]) then + allDigits = false + break + end + end + end + if not allDigits then + error_code = "D3093" + end + end + if error_code then + H.err(error_code) + end + end + + -- analyse the picture string, F&O 4.7.4 + local analyse = function(parts) + local getGroupingPositions = function(part, toLeft) + local positions = {} + local groupingPosition = indexOf(part, properties["grouping-separator"]) + while groupingPosition ~= -1 do + local seg = toLeft and substring(part, 0, groupingPosition) or substring(part, groupingPosition) + local charsToTheRight = 0 + for i = 1, seg.len do + local ch = seg.chars[i] + if contains(decimalDigitFamily, ch) or ch == properties.digit then + charsToTheRight = charsToTheRight + 1 + end + end + positions[#positions + 1] = charsToTheRight + -- VERBATIM jsonata quirk: references parts.integerPart even for the + -- fractional call. Do not "fix". + groupingPosition = indexOf(parts.integerPart, properties["grouping-separator"], groupingPosition + 1) + end + return positions + end + local integerPartGroupingPositions = getGroupingPositions(parts.integerPart) + local regular = function(indexes) + if #indexes == 0 then + return 0 + end + local function gcd(a, b) + return b == 0 and a or gcd(b, a % b) + end + local factor = indexes[1] + for i = 2, #indexes do + factor = gcd(factor, indexes[i]) + end + for index = 1, #indexes do + if not contains(indexes, index * factor) then + return 0 + end + end + return factor + end + + local regularGrouping = regular(integerPartGroupingPositions) + local fractionalPartGroupingPositions = getGroupingPositions(parts.fractionalPart, true) + + local minimumIntegerPartSize = 0 + for i = 1, parts.integerPart.len do + if contains(decimalDigitFamily, parts.integerPart.chars[i]) then + minimumIntegerPartSize = minimumIntegerPartSize + 1 + end + end + local scalingFactor = minimumIntegerPartSize + + local minimumFactionalPartSize = 0 + local maximumFactionalPartSize = 0 + for i = 1, parts.fractionalPart.len do + local ch = parts.fractionalPart.chars[i] + if contains(decimalDigitFamily, ch) then + minimumFactionalPartSize = minimumFactionalPartSize + 1 + maximumFactionalPartSize = maximumFactionalPartSize + 1 + elseif ch == properties.digit then + maximumFactionalPartSize = maximumFactionalPartSize + 1 + end + end + local exponentPresent = (parts.exponentPart ~= nil) + if minimumIntegerPartSize == 0 and maximumFactionalPartSize == 0 then + if exponentPresent then + minimumFactionalPartSize = 1 + maximumFactionalPartSize = 1 + else + minimumIntegerPartSize = 1 + end + end + if exponentPresent and minimumIntegerPartSize == 0 and indexOf(parts.integerPart, properties.digit) ~= -1 then + minimumIntegerPartSize = 1 + end + if minimumIntegerPartSize == 0 and minimumFactionalPartSize == 0 then + minimumFactionalPartSize = 1 + end + local minimumExponentSize = 0 + if exponentPresent then + for i = 1, parts.exponentPart.len do + if contains(decimalDigitFamily, parts.exponentPart.chars[i]) then + minimumExponentSize = minimumExponentSize + 1 + end + end + end + + return { + integerPartGroupingPositions = integerPartGroupingPositions, + regularGrouping = regularGrouping, + minimumIntegerPartSize = minimumIntegerPartSize, + scalingFactor = scalingFactor, + prefix = cstr_tostring(parts.prefix), + fractionalPartGroupingPositions = fractionalPartGroupingPositions, + minimumFactionalPartSize = minimumFactionalPartSize, + maximumFactionalPartSize = maximumFactionalPartSize, + minimumExponentSize = minimumExponentSize, + suffix = cstr_tostring(parts.suffix), + picture = cstr_tostring(parts.subpicture), + } + end + + local parts = {} + for i = 1, #subPictures do + parts[i] = splitParts(subPictures[i]) + end + for i = 1, #parts do + validate(parts[i]) + end + + local variables = {} + for i = 1, #parts do + variables[i] = analyse(parts[i]) + end + + local minus_sign = properties["minus-sign"] + local zero_digit = properties["zero-digit"] + local decimal_separator = properties["decimal-separator"] + local grouping_separator = properties["grouping-separator"] + + if #variables == 1 then + -- deep copy variables[1] (plain fields only) + local src = variables[1] + local copy = { + integerPartGroupingPositions = {}, + regularGrouping = src.regularGrouping, + minimumIntegerPartSize = src.minimumIntegerPartSize, + scalingFactor = src.scalingFactor, + prefix = src.prefix, + fractionalPartGroupingPositions = {}, + minimumFactionalPartSize = src.minimumFactionalPartSize, + maximumFactionalPartSize = src.maximumFactionalPartSize, + minimumExponentSize = src.minimumExponentSize, + suffix = src.suffix, + picture = src.picture, + } + for i = 1, #src.integerPartGroupingPositions do + copy.integerPartGroupingPositions[i] = src.integerPartGroupingPositions[i] + end + for i = 1, #src.fractionalPartGroupingPositions do + copy.fractionalPartGroupingPositions[i] = src.fractionalPartGroupingPositions[i] + end + variables[2] = copy + variables[2].prefix = minus_sign .. variables[2].prefix + end + + -- format the number + local pic + -- bullet 2: + if value >= 0 then + pic = variables[1] + else + pic = variables[2] + end + local adjustedNumber + -- bullet 3: + if indexOf(cstr(pic.picture), properties.percent) ~= -1 then + adjustedNumber = value * 100 + elseif indexOf(cstr(pic.picture), properties["per-mille"]) ~= -1 then + adjustedNumber = value * 1000 + else + adjustedNumber = value + end + -- bullet 5: + local mantissa, exponent + if pic.minimumExponentSize == 0 then + mantissa = adjustedNumber + else + local maxMantissa = 10 ^ pic.scalingFactor + local minMantissa = 10 ^ (pic.scalingFactor - 1) + mantissa = adjustedNumber + exponent = 0 + -- zero guard (#785): no normalisation for zero input + if mantissa ~= 0 then + while math.abs(mantissa) < minMantissa do + mantissa = mantissa * 10 + exponent = exponent - 1 + end + while math.abs(mantissa) > maxMantissa do + mantissa = mantissa / 10 + exponent = exponent + 1 + end + end + end + -- bullet 6: + local roundedNumber = H.round_half_even(mantissa, pic.maximumFactionalPartSize) + -- bullet 7: + local makeString = function(val, dp) + local str = string.format("%." .. dp .. "f", math.abs(val)) + if zero_digit ~= "0" then + local out = {} + for i = 1, #str do + local c = str:sub(i, i) + if c >= "0" and c <= "9" then + out[#out + 1] = decimalDigitFamily[c:byte(1) - 48 + 1] + else + out[#out + 1] = c + end + end + str = table.concat(out) + end + return str + end + local stringValue = cstr(makeString(roundedNumber, pic.maximumFactionalPartSize)) + local decimalPos = indexOf(stringValue, ".") + if decimalPos == -1 then + stringValue = cstr(cstr_tostring(stringValue) .. decimal_separator) + else + -- replace first "." with decimal_separator + local p = decimalPos + stringValue = cstr(cstr_tostring(substring(stringValue, 0, p)) .. decimal_separator .. cstr_tostring(substring(stringValue, p + 1))) + end + while charAt(stringValue, 0) == zero_digit do + stringValue = substring(stringValue, 1) + end + while charAt(stringValue, stringValue.len - 1) == zero_digit do + stringValue = substring(stringValue, 0, stringValue.len - 1) + end + -- bullets 8 & 9: + decimalPos = indexOf(stringValue, decimal_separator) + local padLeft = pic.minimumIntegerPartSize - decimalPos + local padRight = pic.minimumFactionalPartSize - (stringValue.len - decimalPos - 1) + stringValue = cstr((padLeft > 0 and string.rep(zero_digit, padLeft) or "") .. cstr_tostring(stringValue)) + stringValue = cstr(cstr_tostring(stringValue) .. (padRight > 0 and string.rep(zero_digit, padRight) or "")) + decimalPos = indexOf(stringValue, decimal_separator) + -- bullet 10: + if pic.regularGrouping > 0 then + local groupCount = math.floor((decimalPos - 1) / pic.regularGrouping) + for group = 1, groupCount do + stringValue = cstr( + cstr_tostring(slice(stringValue, 0, decimalPos - group * pic.regularGrouping)) + .. grouping_separator + .. cstr_tostring(slice(stringValue, decimalPos - group * pic.regularGrouping)) + ) + end + else + for _, pos in ipairs(pic.integerPartGroupingPositions) do + stringValue = cstr(cstr_tostring(slice(stringValue, 0, decimalPos - pos)) .. grouping_separator .. cstr_tostring(slice(stringValue, decimalPos - pos))) + decimalPos = decimalPos + 1 + end + end + -- bullet 11: + decimalPos = indexOf(stringValue, decimal_separator) + for _, pos in ipairs(pic.fractionalPartGroupingPositions) do + stringValue = + cstr(cstr_tostring(slice(stringValue, 0, pos + decimalPos + 1)) .. grouping_separator .. cstr_tostring(slice(stringValue, pos + decimalPos + 1))) + end + -- bullet 12: + decimalPos = indexOf(stringValue, decimal_separator) + if indexOf(cstr(pic.picture), decimal_separator) == -1 or decimalPos == stringValue.len - 1 then + stringValue = substring(stringValue, 0, stringValue.len - 1) + end + -- bullet 13: + if exponent ~= nil then + local stringExponent = cstr(makeString(exponent, 0)) + padLeft = pic.minimumExponentSize - stringExponent.len + if padLeft > 0 then + stringExponent = cstr(string.rep(zero_digit, padLeft) .. cstr_tostring(stringExponent)) + end + stringValue = cstr(cstr_tostring(stringValue) .. properties["exponent-separator"] .. (exponent < 0 and minus_sign or "") .. cstr_tostring(stringExponent)) + end + -- bullet 14: + return pic.prefix .. cstr_tostring(stringValue) .. pic.suffix +end, 2, 3, "") + +return R diff --git a/src/jsonata/functions/init.lua b/src/jsonata/functions/init.lua index 9f97839..24161b7 100644 --- a/src/jsonata/functions/init.lua +++ b/src/jsonata/functions/init.lua @@ -7,6 +7,7 @@ local categories = { require("jsonata.functions.boolean"), require("jsonata.functions.string"), require("jsonata.functions.numeric"), + require("jsonata.functions.formatnumber"), require("jsonata.functions.aggregation"), require("jsonata.functions.array"), require("jsonata.functions.object"), From 6119c49dcd6988a17e18d5724ffff57123822d5b Mon Sep 17 00:00:00 2001 From: fl Date: Thu, 25 Jun 2026 18:57:53 +0800 Subject: [PATCH 3/3] fix(M8a): faithful half-even round (no digit loss) + oracle error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adversarial review found two issues. (1) H.round_half_even's %.12g snap dropped the 13th significant digit (e.g. $formatNumber(1234567890123,"0")); replace with a faithful port of jsonata round() — shift the decimal via string, not multiply, then exact ties-to-even. Fixes $round/$formatBase/$formatNumber for 13+ digit values. (2) D3080-D3093 message strings now match jsonata verbatim (D3080 was a copy-paste of the percent message; others said "picture string" not "sub-picture"). Co-Authored-By: Claude Opus 4.8 --- spec/formatnumber_spec.lua | 26 ++++++++++++++ src/jsonata/errors.lua | 26 +++++++------- src/jsonata/functions/helpers.lua | 58 +++++++++++++++++++------------ 3 files changed, 74 insertions(+), 36 deletions(-) diff --git a/spec/formatnumber_spec.lua b/spec/formatnumber_spec.lua index cc0f434..32c8341 100644 --- a/spec/formatnumber_spec.lua +++ b/spec/formatnumber_spec.lua @@ -52,3 +52,29 @@ describe("M8a: $formatNumber decimal-format picture", function() assert.is_nil(run('$formatNumber(blah, "0")', {})) end) end) + +describe("M8a fixes: large-integer rounding fidelity", function() + it("$formatNumber preserves 13+ significant digits", function() + assert.are.equal("1234567890123", run('$formatNumber(1234567890123, "0")')) + assert.are.equal("9999999999999", run('$formatNumber(9999999999999, "0")')) + assert.are.equal("12345678901.23", run('$formatNumber(12345678901.23, "0.00")')) + end) + it("$round preserves 13-digit integers", function() + assert.are.equal(1234567890123, run("$round(1234567890123)")) + assert.are.equal(4.52, run("$round(4.525, 2)")) + assert.are.equal(2, run("$round(2.5)")) + assert.are.equal(4, run("$round(3.5)")) + end) + it("$formatBase preserves large integers", function() + assert.are.equal("5890840712243076", run("$formatBase(big_id)", { big_id = 5890840712243076 })) + assert.are.equal("1234567890123", run("$formatBase(1234567890123)")) + end) +end) +describe("M8a fixes: error messages match oracle", function() + it("D3080 message is the sub-picture-count message", function() + local ok, err = pcall(run, '$formatNumber(20, "#;#;#")') + assert.is_false(ok) + assert.are.equal("D3080", err.code) + assert.is_truthy(err.message and err.message:find("two sub%-pictures")) + end) +end) diff --git a/src/jsonata/errors.lua b/src/jsonata/errors.lua index 107e228..ad03d5d 100644 --- a/src/jsonata/errors.lua +++ b/src/jsonata/errors.lua @@ -48,20 +48,20 @@ local MESSAGES = { D3060 = "$sqrt of a number that is less than zero", D3070 = "The single argument of the $sort function must be an array of strings or numbers. Use a comparator function to sort other types.", D3100 = "The radix of $formatBase must be between 2 and 36", - D3080 = "The picture string must not contain more than one instance of the 'percent' character", - D3081 = "The picture string must not contain more than one instance of the 'decimal-separator' character", - D3082 = "The picture string must not contain more than one instance of the 'percent' character", - D3083 = "The picture string must not contain more than one instance of the 'per-mille' character", - D3084 = "The picture string must not contain both a 'percent' and a 'per-mille' character", - D3085 = "The mantissa part of a picture string must contain at least one character that is either an 'optional digit character' or a member of the 'decimal digit family'", - D3086 = "The picture string must not contain a passive character that is preceded by an active character and that is followed by another active character", - D3087 = "The picture string must not contain a 'grouping-separator' character that appears adjacent to a 'decimal-separator' character", - D3088 = "The picture string must not contain a 'grouping-separator' at the end of the integer part", - D3089 = "The picture string must not contain two adjacent instances of the 'grouping-separator' character", - D3090 = "The integer part of the picture string must not contain a member of the 'decimal digit family' that is followed by an instance of the 'optional digit character'", - D3091 = "The fractional part of the picture string must not contain an instance of the 'optional digit character' that is followed by a member of the 'decimal digit family'", + D3080 = "The picture string must only contain a maximum of two sub-pictures", + D3081 = "The sub-picture must not contain more than one instance of the 'decimal-separator' character", + D3082 = "The sub-picture must not contain more than one instance of the 'percent' character", + D3083 = "The sub-picture must not contain more than one instance of the 'per-mille' character", + D3084 = "The sub-picture must not contain both a 'percent' and a 'per-mille' character", + D3085 = "The mantissa part of a sub-picture must contain at least one character that is either an 'optional digit character' or a member of the 'decimal digit family'", + D3086 = "The sub-picture must not contain a passive character that is preceded by an active character and that is followed by another active character", + D3087 = "The sub-picture must not contain a 'grouping-separator' character that appears adjacent to a 'decimal-separator' character", + D3088 = "The sub-picture must not contain a 'grouping-separator' at the end of the integer part", + D3089 = "The sub-picture must not contain two adjacent instances of the 'grouping-separator' character", + D3090 = "The integer part of the sub-picture must not contain a member of the 'decimal digit family' that is followed by an instance of the 'optional digit character'", + D3091 = "The fractional part of the sub-picture must not contain an instance of the 'optional digit character' that is followed by a member of the 'decimal digit family'", D3092 = "A sub-picture that contains a 'percent' or 'per-mille' character must not contain a character treated as an 'exponent-separator'", - D3093 = "The exponent part of the picture string must comprise only of one or more characters that are members of the 'decimal digit family'", + D3093 = "The exponent part of the sub-picture must comprise only of one or more characters that are members of the 'decimal digit family'", D3137 = "$error() function evaluated", D3138 = "The single() function expected exactly 1 matching result. Instead it matched more.", D3139 = "The single() function expected exactly 1 matching result. Instead it matched 0.", diff --git a/src/jsonata/functions/helpers.lua b/src/jsonata/functions/helpers.lua index ed2b21b..ad300d3 100644 --- a/src/jsonata/functions/helpers.lua +++ b/src/jsonata/functions/helpers.lua @@ -109,31 +109,43 @@ function H.err(code, info) errors.raise(code, info or {}) end --- Half-to-even (banker's) rounding with optional decimal precision. Shared by --- $round, $formatBase, $formatNumber. (jsonata's `round`.) +-- Half-to-even (banker's) rounding with optional decimal precision. Faithful +-- port of jsonata's round() (jsonata.js:2658): shift the decimal place via a +-- STRING (never multiply by 10^p, which injects float error and used to drop +-- significant digits), round half-up to nearest integer, then correct exact +-- ties to even. Shared by $round, $formatBase, $formatNumber. +local function shift_decimal(x, by) + -- mimic JS: x.toString().split('e'); +(mantissa + 'e' + (exp + by)) + local s = tostring(x) + local mant, exp = s:match("^([^eE]+)[eE]([%+%-]?%d+)$") + if mant then + return tonumber(mant .. "e" .. (tonumber(exp) + by)) + end + return tonumber(s .. "e" .. by) +end + function H.round_half_even(x, precision) precision = precision or 0 - local factor = 10 ^ precision - local scaled = x * factor - -- Correct binary-float representation error before the half-even test. - -- %.12g snaps values like 452.5000000000001 -> 452.5 (12 sig-fig round), - -- which is what jsonata-js does implicitly via IEEE-754 string coercion. - -- Guard: only apply when |scaled| < 1e13 so we don't truncate large integers - -- (e.g. 5890840712243076 has 16 digits and needs no correction — diff is 0). - if math.abs(scaled) < 1e13 then - scaled = tonumber(string.format("%.12g", scaled)) or scaled - end - local floored = math.floor(scaled) - local diff = scaled - floored - local rounded - if diff < 0.5 then - rounded = floored - elseif diff > 0.5 then - rounded = floored + 1 - else - rounded = (floored % 2 == 0) and floored or floored + 1 - end - return rounded / factor + local arg = x + if precision ~= 0 then + arg = shift_decimal(arg, precision) + end + -- Math.round: round half toward +infinity (frac-based so large integers, + -- where x + 0.5 is unrepresentable, are returned exactly). + local f = math.floor(arg) + local result = (arg - f < 0.5) and f or (f + 1) + -- ties-to-even: if we rounded exactly 0.5 the wrong way, step to even + local diff = result - arg + if math.abs(diff) == 0.5 and math.abs(result % 2) == 1 then + result = result - 1 + end + if precision ~= 0 then + result = shift_decimal(result, -precision) + end + if result == 0 then -- normalize -0.0 to 0 + result = 0 + end + return result end -- Structural equality over internal values (numbers/strings/booleans exact;