From 41fb595b4e07d06ea54d528fda2ee10ca54b18b7 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 24 Jan 2022 13:58:10 +0100 Subject: [PATCH 1/3] DPL Analysis: add exception for unsorted unassigned groups If unassigned groups are in incorrect order (or split), fast grouping is invalid due to incorrect offsets, so the execution should stop. --- Framework/Core/include/Framework/Kernels.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index 3a5d06bfb0dc3..91ee4ff78bac8 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -81,14 +81,23 @@ auto sliceByColumn( for (auto i = 0; i < column->num_chunks(); ++i) { T prev = 0; T cur = 0; + T nprev = -1; auto array = static_cast::ArrowType>>(column->chunk(i)->data()); for (auto e = 0; e < array.length(); ++e) { if (cur >= 0) { prev = cur; + } else { + nprev = cur; } cur = array.Value(e); - if (cur >= 0 && prev > cur) { - throw runtime_error_f("Table %s index %s is not sorted: next value %d < previous value %d!", target, key, cur, prev); + if (cur >= 0) { + if (prev > cur) { + throw runtime_error_f("Table %s index %s is not sorted: next value %d < previous value %d!", target, key, cur, prev); + } + } else { + if (nprev < cur) { + throw runtime_error_f("Table %s index %s is not sorted: next negative value %d > previous negative value %d!", target, key, cur, nprev); + } } } } From 1cc75321bb0a5d8778bf0ec63abf170d37503917 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 24 Jan 2022 16:57:33 +0100 Subject: [PATCH 2/3] improve the logic to handle groups split by opposite-signed index --- Framework/Core/include/Framework/Kernels.h | 23 ++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index 91ee4ff78bac8..5b8c3220a2abb 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -81,22 +81,29 @@ auto sliceByColumn( for (auto i = 0; i < column->num_chunks(); ++i) { T prev = 0; T cur = 0; - T nprev = -1; + T lastNeg = -1; + T lastPos = 0; + auto array = static_cast::ArrowType>>(column->chunk(i)->data()); for (auto e = 0; e < array.length(); ++e) { - if (cur >= 0) { - prev = cur; + prev = cur; + if (prev >= 0) { + lastPos = prev; } else { - nprev = cur; + lastNeg = prev; } cur = array.Value(e); if (cur >= 0) { - if (prev > cur) { - throw runtime_error_f("Table %s index %s is not sorted: next value %d < previous value %d!", target, key, cur, prev); + if (lastPos > cur) { + throw runtime_error_f("Table %s index %s is not sorted: next value %d < previous value %d!", target, key, cur, lastPos); + } else if (lastPos == cur && prev < 0) { + throw runtime_error_f("Table %s index %s has a group with index %d that is split by %d", target, key, cur, prev); } } else { - if (nprev < cur) { - throw runtime_error_f("Table %s index %s is not sorted: next negative value %d > previous negative value %d!", target, key, cur, nprev); + if (lastNeg < cur) { + throw runtime_error_f("Table %s index %s is not sorted: next negative value %d > previous negative value %d!", target, key, cur, lastNeg); + } else if (lastNeg == cur && prev >= 0) { + throw runtime_error_f("Table %s index %s has a group with index %d that is split by %d", target, key, cur, prev); } } } From e86009d0f21716f8e0f43048bccf0941002a8e12 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 24 Jan 2022 17:42:32 +0100 Subject: [PATCH 3/3] fixup! improve the logic to handle groups split by opposite-signed index --- Framework/Core/include/Framework/Kernels.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index 5b8c3220a2abb..1355968559868 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -81,7 +81,7 @@ auto sliceByColumn( for (auto i = 0; i < column->num_chunks(); ++i) { T prev = 0; T cur = 0; - T lastNeg = -1; + T lastNeg = 0; T lastPos = 0; auto array = static_cast::ArrowType>>(column->chunk(i)->data());