[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r8964 - gnuradio/branches/developers/eb/vmx/gnuradio-c
From: |
eb |
Subject: |
[Commit-gnuradio] r8964 - gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter |
Date: |
Mon, 21 Jul 2008 16:03:04 -0600 (MDT) |
Author: eb
Date: 2008-07-21 16:03:03 -0600 (Mon, 21 Jul 2008)
New Revision: 8964
Modified:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
Log:
work-in-progress on altivec
Modified:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
===================================================================
---
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
2008-07-21 21:42:29 UTC (rev 8963)
+++
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
2008-07-21 22:03:03 UTC (rev 8964)
@@ -26,9 +26,11 @@
#include <altivec.h>
#include <stdlib.h>
#include <stdexcept>
+#include <assert.h>
-static const unsigned FLOATS_PER_VEC = 4;
+static const size_t VS = sizeof(vector float);
+static const size_t FLOATS_PER_VEC = 4;
union v_float_u {
vector float v;
@@ -94,17 +96,16 @@
#if 0
float
-dotprod_fff_vmx(const float *a, const float *b, unsigned n)
+dotprod_fff_vmx(const float *a, const float *b, size_t n)
{
float sum = 0;
- for (unsigned int i = 0; i < n; i++){
+ for (size_t i = 0; i < n; i++){
sum += a[i] * b[i];
}
return sum;
}
-#else
-
+#elif 0
/*
* preconditions:
*
@@ -113,11 +114,11 @@
* b 4-byte aligned
*/
float
-dotprod_fff_vmx(const float *a, const float *b, unsigned n)
+dotprod_fff_vmx(const float *a, const float *b, size_t n)
{
- static const unsigned FLOATS_PER_LOOP = 1 * FLOATS_PER_VEC;
+ static const size_t FLOATS_PER_LOOP = 1 * FLOATS_PER_VEC;
- unsigned loop_cnt = n / FLOATS_PER_LOOP;
+ size_t loop_cnt = n / FLOATS_PER_LOOP;
vector float acc0 = {0, 0, 0, 0};
vector unsigned char lvsl_a;
@@ -136,7 +137,7 @@
msq_b0 = vec_ld(0, b);
b += FLOATS_PER_VEC;
- for (unsigned i = 0; i < loop_cnt; i++){
+ for (size_t i = 0; i < loop_cnt; i++){
lsq_a0 = vec_ld(0, a);
lsq_b0 = vec_ld(0, b);
a += FLOATS_PER_VEC;
@@ -153,6 +154,115 @@
return horizontal_add_f(acc0);
}
+
+#else
+/*
+ * preconditions:
+ *
+ * n > 0 and a multiple of 4
+ * a 4-byte aligned
+ * b 16-byte aligned
+ */
+float
+dotprod_fff_vmx(const float *_a, const float *_b, size_t n)
+{
+ const vector float *a = (const vector float *) _a;
+ const vector float *b = (const vector float *) _b;
+
+ static const size_t UNROLL_CNT = 4;
+
+ size_t loop_cnt = n / (UNROLL_CNT * FLOATS_PER_VEC);
+ size_t nleft = n % (UNROLL_CNT * FLOATS_PER_VEC);
+
+ // printf("n = %zd, loop_cnt = %zd, nleft = %zd\n", n, loop_cnt, nleft);
+
+ // Used with vperm to build a* from p*
+ vector unsigned char lvsl_a = vec_lvsl(0, _a);
+
+ vector float p0, p1, p2, p3;
+ vector float a0, a1, a2, a3;
+ vector float b0, b1, b2, b3;
+ vector float acc0 = {0, 0, 0, 0};
+ vector float acc1 = {0, 0, 0, 0};
+ vector float acc2 = {0, 0, 0, 0};
+ vector float acc3 = {0, 0, 0, 0};
+
+ // wind in
+
+ p0 = vec_ld(0*VS, a);
+ p1 = vec_ld(1*VS, a);
+ p2 = vec_ld(2*VS, a);
+ p3 = vec_ld(3*VS, a);
+
+ a0 = vec_perm(p0, p1, lvsl_a);
+ b0 = vec_ld(0*VS, b);
+ p0 = vec_ld((UNROLL_CNT + 0)*VS, a);
+
+ for (size_t i = 0; i < loop_cnt; i++){
+
+ a1 = vec_perm(p1, p2, lvsl_a);
+ b1 = vec_ld(1*VS, b);
+ p1 = vec_ld((UNROLL_CNT + 1)*VS, a);
+ acc0 = vec_madd(a0, b0, acc0);
+
+ a2 = vec_perm(p2, p3, lvsl_a);
+ b2 = vec_ld(2*VS, b);
+ p2 = vec_ld((UNROLL_CNT + 2)*VS, a);
+ acc1 = vec_madd(a1, b1, acc1);
+
+ a3 = vec_perm(p3, p0, lvsl_a);
+ b3 = vec_ld(3*VS, b);
+ p3 = vec_ld((UNROLL_CNT + 3)*VS, a);
+ acc2 = vec_madd(a2, b2, acc2);
+
+ a += UNROLL_CNT;
+ b += UNROLL_CNT;
+
+ a0 = vec_perm(p0, p1, lvsl_a);
+ b0 = vec_ld(0*VS, b);
+ p0 = vec_ld((UNROLL_CNT + 0)*VS, a);
+ acc3 = vec_madd(a3, b3, acc3);
+ }
+
+ assert((nleft % 4) == 0);
+
+ switch (nleft/4){
+ case 0:
+ break;
+
+ case 1:
+ acc0 = vec_madd(a0, b0, acc0);
+ break;
+
+ case 2:
+ a1 = vec_perm(p1, p2, lvsl_a);
+ b1 = vec_ld(1*VS, b);
+ acc0 = vec_madd(a0, b0, acc0);
+ acc1 = vec_madd(a1, b1, acc1);
+ break;
+
+ case 3:
+ a1 = vec_perm(p1, p2, lvsl_a);
+ b1 = vec_ld(1*VS, b);
+ acc0 = vec_madd(a0, b0, acc0);
+ a2 = vec_perm(p2, p3, lvsl_a);
+ b2 = vec_ld(2*VS, b);
+ acc1 = vec_madd(a1, b1, acc1);
+ acc2 = vec_madd(a2, b2, acc2);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ acc0 = acc0 + acc1;
+ acc2 = acc2 + acc3;
+ acc0 = acc0 + acc2;
+
+ return horizontal_add_f(acc0);
+}
+
#endif
gr_fir_fff_vmx::gr_fir_fff_vmx()
Modified:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
===================================================================
---
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
2008-07-21 21:42:29 UTC (rev 8963)
+++
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
2008-07-21 22:03:03 UTC (rev 8964)
@@ -143,7 +143,7 @@
static void
test_random_io (fir_maker_t maker)
{
- const int MAX_TAPS = 9;
+ const int MAX_TAPS = 32;
const int OUTPUT_LEN = 17;
const int INPUT_LEN = MAX_TAPS + OUTPUT_LEN;
@@ -187,7 +187,7 @@
for (int o = 0; o < ol; o++){
CPPUNIT_ASSERT_DOUBLES_EQUAL (expected_output[o], actual_output[o],
- fabs (expected_output[o]) * 1e-4);
+ fabs (expected_output[o]) * 5e-3);
}
delete f1;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r8964 - gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter,
eb <=