From 2aa9a0e81679d5a8d26e30939f8f851c0535c215 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 15:51:26 -0600 Subject: [PATCH 1/8] Update citation. Reorder functions. Add final twosum() call. --- Modules/mathmodule.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 1342162fa74be7..c0b1feb9f97cb6 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2842,10 +2842,13 @@ based on ideas from three sources: by T. J. Dekker https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf - Ultimately Fast Accurate Summation by Siegfried M. Rump - https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf + Accurate Sum and Dot Product + by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi + https://doi.org/10.1137/030601818 + https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf Double length functions: +* twosum() error-free transformation of the sum of two C doubles * dl_split() exact split of a C double into two half precision components. * dl_mul() exact multiplication of two C doubles. @@ -2874,22 +2877,6 @@ twosum(double a, double b) return (DoubleLength) {s, t}; } -static inline TripleLength -tl_add(TripleLength total, double x) -{ - /* Input: x total.hi total.lo total.tiny - |--- twosum ---| - s.hi s.lo - |--- twosum ---| - t.hi t.lo - |--- single sum ---| - Output: s.hi t.hi tiny - */ - DoubleLength s = twosum(x, total.hi); - DoubleLength t = twosum(s.lo, total.lo); - return (TripleLength) {s.hi, t.hi, t.lo + total.tiny}; -} - static inline DoubleLength dl_split(double x) { double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */ @@ -2911,6 +2898,22 @@ dl_mul(double x, double y) return (DoubleLength) {z, zz}; } +static inline TripleLength +tl_add(TripleLength total, double x) +{ + /* Input: x total.hi total.lo total.tiny + |--- twosum ---| + s.hi s.lo + |--- twosum ---| + t.hi t.lo + |--- single sum ---| + Output: s.hi t.hi tiny + */ + DoubleLength s = twosum(x, total.hi); + DoubleLength t = twosum(s.lo, total.lo); + return (TripleLength) {s.hi, t.hi, t.lo + total.tiny}; +} + static inline TripleLength tl_fma(TripleLength total, double p, double q) { @@ -2922,7 +2925,8 @@ tl_fma(TripleLength total, double p, double q) static inline double tl_to_d(TripleLength total) { - return total.tiny + total.lo + total.hi; + DoubleLength last = twosum(total.lo, total.hi); + return total.tiny + last.lo + last.hi; } /*[clinic input] From 6ec01c2232482dde2ff85edc4ffbb2e05c7ac09a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 16:10:02 -0600 Subject: [PATCH 2/8] The ASCII art was more distracting than helpful. --- Modules/mathmodule.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index c0b1feb9f97cb6..a70449bb5198cd 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2901,14 +2901,6 @@ dl_mul(double x, double y) static inline TripleLength tl_add(TripleLength total, double x) { - /* Input: x total.hi total.lo total.tiny - |--- twosum ---| - s.hi s.lo - |--- twosum ---| - t.hi t.lo - |--- single sum ---| - Output: s.hi t.hi tiny - */ DoubleLength s = twosum(x, total.hi); DoubleLength t = twosum(s.lo, total.lo); return (TripleLength) {s.hi, t.hi, t.lo + total.tiny}; From 12018cd819b58ea4b3312f13c9797652f048474a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 16:14:23 -0600 Subject: [PATCH 3/8] Segregate double length API from triple length API. --- Modules/mathmodule.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index a70449bb5198cd..e2b17dc1562e99 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2847,23 +2847,9 @@ based on ideas from three sources: https://doi.org/10.1137/030601818 https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf -Double length functions: -* twosum() error-free transformation of the sum of two C doubles -* dl_split() exact split of a C double into two half precision components. -* dl_mul() exact multiplication of two C doubles. - -Triple length functions and constant: -* tl_zero is a triple length zero for starting or resetting an accumulation. -* tl_add() compensated addition of a C double to a triple length number. -* tl_fma() performs a triple length fused-multiply-add. -* tl_to_d() converts from triple length number back to a C double. - */ typedef struct{ double hi; double lo; } DoubleLength; -typedef struct{ double hi; double lo; double tiny; } TripleLength; - -static const TripleLength tl_zero = {0.0, 0.0, 0.0}; static inline DoubleLength twosum(double a, double b) @@ -2898,6 +2884,10 @@ dl_mul(double x, double y) return (DoubleLength) {z, zz}; } +typedef struct{ double hi; double lo; double tiny; } TripleLength; + +static const TripleLength tl_zero = {0.0, 0.0, 0.0}; + static inline TripleLength tl_add(TripleLength total, double x) { From 7b5cf0c13792d35ae2765b9121e7be14f759eac6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 16:19:24 -0600 Subject: [PATCH 4/8] Make comment match presentation in the paper. --- Modules/mathmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index e2b17dc1562e99..35606072c690a7 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2865,7 +2865,7 @@ twosum(double a, double b) static inline DoubleLength dl_split(double x) { - double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */ + double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 double hi = t - (t - x); double lo = x - hi; return (DoubleLength) {hi, lo}; From d271af1f304ac51c408d98280a1c8a66b234667a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 16:39:17 -0600 Subject: [PATCH 5/8] Just two sources cover all the code. --- Modules/mathmodule.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 35606072c690a7..091499f17eac94 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2833,10 +2833,7 @@ long_add_would_overflow(long a, long b) /* Double and triple length extended precision floating point arithmetic -based on ideas from three sources: - - Improved Kahan–Babuška algorithm by Arnold Neumaier - https://www.mat.univie.ac.at/~neum/scan/01.pdf +based on ideas: A Floating-Point Technique for Extending the Available Precision by T. J. Dekker From 52c940ba84c97846f374f34836f7738b3bee65d8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 16:40:53 -0600 Subject: [PATCH 6/8] Fix typo --- Modules/mathmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 091499f17eac94..02632febedb463 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2833,7 +2833,7 @@ long_add_would_overflow(long a, long b) /* Double and triple length extended precision floating point arithmetic -based on ideas: +based on: A Floating-Point Technique for Extending the Available Precision by T. J. Dekker From 1974f594a90266f399aacaeed6fb99f57083c106 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 16:44:53 -0600 Subject: [PATCH 7/8] Remove superfluous character --- Modules/mathmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 02632febedb463..2f22d7acde533b 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3022,7 +3022,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } finalize_int_path: - // # We're finished, overflowed, or have a non-int + // We're finished, overflowed, or have a non-int int_path_enabled = false; if (int_total_in_use) { term_i = PyLong_FromLong(int_total); From 5dc90994221236b98292bb6c1bff63d0c8d2e29e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 22 Jan 2023 16:45:22 -0600 Subject: [PATCH 8/8] Remove superfluous character --- Modules/mathmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 2f22d7acde533b..69907ea04ec812 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3022,7 +3022,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } finalize_int_path: - // We're finished, overflowed, or have a non-int + // We're finished, overflowed, or have a non-int int_path_enabled = false; if (int_total_in_use) { term_i = PyLong_FromLong(int_total);