+static int32_t do_vqdmladh_w(int32_t a, int32_t b, int32_t c, int32_t d,
+ int round, bool *sat)
+{
+ int64_t m1 = (int64_t)a * b;
+ int64_t m2 = (int64_t)c * d;
+ int64_t r;
+ /*
+ * Architecturally we should do the entire add, double, round
+ * and then check for saturation. We do three saturating adds,
+ * but we need to be careful about the order. If the first
+ * m1 + m2 saturates then it's impossible for the *2+rc to
+ * bring it back into the non-saturated range. However, if
+ * m1 + m2 is negative then it's possible that doing the doubling
+ * would take the intermediate result below INT64_MAX and the
+ * addition of the rounding constant then brings it back in range.
+ * So we add half the rounding constant before doubling rather
+ * than adding the rounding constant after the doubling.
+ */
+ if (sadd64_overflow(m1, m2, &r) ||
+ sadd64_overflow(r, (round << 30), &r) ||
+ sadd64_overflow(r, r, &r)) {