[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[lmi] fma() is not a speed optimization: self-documenting patch
From: |
Greg Chicares |
Subject: |
[lmi] fma() is not a speed optimization: self-documenting patch |
Date: |
Sun, 1 May 2022 19:35:30 +0000 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.8.0 |
--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
diff --git a/financial.hpp b/financial.hpp
index 7eba93bb6..ee5aea5ff 100644
--- a/financial.hpp
+++ b/financial.hpp
@@ -30,6 +30,7 @@
#include "miscellany.hpp" // ios_out_app_binary()
#include "zero.hpp" // decimal_root()
+#include <cmath> // fma()
#include <fstream>
#include <iterator> // iterator_traits
@@ -56,13 +57,31 @@ long double fv
// Symbol v, meaning 1/(1+i), is standard. A corresponding
// symbol u, meaning (1+i), is not standard, but should be.
long double const u = 1.0L + i;
- long double z = 0.0L;
- for(InputIterator j = first; j != last; ++j)
+ long double z = *first;
+ for(InputIterator j = 1 + first; j != last; ++j)
{
- z += *j;
- z *= u;
+// several options tested with:
+// $make $coefficiency unit_tests unit_test_targets=financial_test 2>&1
|grep form
+// all unit tests pass for all options; only speed varies
+//
+// HEAD (without the other modifications here):
+// z += *j;
+// z *= u;
+// iterator form: 1.440e-02 s mean; 13512 us least of 70 runs
+// container form: 1.361e-02 s mean; 13387 us least of 74 runs
+
+// Rearranged calculation (multiply, then add):
+// z = (z * u) + *j;
+// iterator form: 1.519e-02 s mean; 13450 us least of 66 runs
+// container form: 1.368e-02 s mean; 13456 us least of 74 runs
+
+// Fused multiply-add...expecting a speed gain?
+ z = std::fma(z, u, *j);
+// iterator form: 3.343e-01 s mean; 333474 us least of 3 runs
+// container form: 3.338e-01 s mean; 333260 us least of 3 runs
+// no material difference observed with std::fmal() instead of std::fma()
}
- return z;
+ return z * u;
}
/// IRR: internal rate of return
--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [lmi] fma() is not a speed optimization: self-documenting patch,
Greg Chicares <=