[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r8971 - gnuradio/branches/developers/eb/vmx/gnuradio-c
From: |
eb |
Subject: |
[Commit-gnuradio] r8971 - gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter |
Date: |
Tue, 22 Jul 2008 00:23:06 -0600 (MDT) |
Author: eb
Date: 2008-07-22 00:23:05 -0600 (Tue, 22 Jul 2008)
New Revision: 8971
Added:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
Modified:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
Log:
working altivec gr_fir_fff. About 3x faster on PS3
Modified:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am
===================================================================
---
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am
2008-07-22 03:57:22 UTC (rev 8970)
+++
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am
2008-07-22 06:23:05 UTC (rev 8971)
@@ -171,7 +171,8 @@
gr_fir_sysconfig_powerpc.cc \
gr_cpu_powerpc.cc \
gr_fir_fff_altivec.cc \
- gr_altivec.c
+ gr_altivec.c \
+ dotprod_fff_altivec.c
powerpc_qa_CODE = \
qa_dotprod_powerpc.cc
@@ -291,6 +292,7 @@
noinst_HEADERS = \
assembly.h \
+ dotprod_fff_altivec.h \
gr_fir_scc_simd.h \
gr_fir_scc_x86.h \
gr_fir_fcc_simd.h \
Added:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
===================================================================
---
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
(rev 0)
+++
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
2008-07-22 06:23:05 UTC (rev 8971)
@@ -0,0 +1,162 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <dotprod_fff_altivec.h>
+#include <gr_altivec.h>
+
+/*!
+ * \param x any value
+ * \param pow2 must be a power of 2
+ * \returns \p x rounded down to a multiple of \p pow2.
+ */
+static inline size_t
+gr_p2_round_down(size_t x, size_t pow2)
+{
+ return x & -pow2;
+}
+
+
+#if 0
+
+float
+dotprod_fff_altivec(const float *a, const float *b, size_t n)
+{
+ float sum = 0;
+ for (size_t i = 0; i < n; i++){
+ sum += a[i] * b[i];
+ }
+ return sum;
+}
+
+#else
+
+/*
+ * preconditions:
+ *
+ * n > 0 and a multiple of 4
+ * a 4-byte aligned
+ * b 16-byte aligned
+ */
+float
+dotprod_fff_altivec(const float *_a, const float *_b, size_t n)
+{
+ const vector float *a = (const vector float *) _a;
+ const vector float *b = (const vector float *) _b;
+
+ static const size_t UNROLL_CNT = 4;
+
+ n = gr_p2_round_down(n, 4);
+ size_t loop_cnt = n / (UNROLL_CNT * FLOATS_PER_VEC);
+ size_t nleft = n % (UNROLL_CNT * FLOATS_PER_VEC);
+
+ // printf("n = %zd, loop_cnt = %zd, nleft = %zd\n", n, loop_cnt, nleft);
+
+ // Used with vperm to build a* from p*
+ vector unsigned char lvsl_a = vec_lvsl(0, _a);
+
+ vector float p0, p1, p2, p3;
+ vector float a0, a1, a2, a3;
+ vector float b0, b1, b2, b3;
+ vector float acc0 = {0, 0, 0, 0};
+ vector float acc1 = {0, 0, 0, 0};
+ vector float acc2 = {0, 0, 0, 0};
+ vector float acc3 = {0, 0, 0, 0};
+
+ // wind in
+
+ p0 = vec_ld(0*VS, a);
+ p1 = vec_ld(1*VS, a);
+ p2 = vec_ld(2*VS, a);
+ p3 = vec_ld(3*VS, a);
+ a += UNROLL_CNT;
+
+ a0 = vec_perm(p0, p1, lvsl_a);
+ b0 = vec_ld(0*VS, b);
+ p0 = vec_ld(0*VS, a);
+
+ size_t i;
+ for (i = 0; i < loop_cnt; i++){
+
+ a1 = vec_perm(p1, p2, lvsl_a);
+ b1 = vec_ld(1*VS, b);
+ p1 = vec_ld(1*VS, a);
+ acc0 = vec_madd(a0, b0, acc0);
+
+ a2 = vec_perm(p2, p3, lvsl_a);
+ b2 = vec_ld(2*VS, b);
+ p2 = vec_ld(2*VS, a);
+ acc1 = vec_madd(a1, b1, acc1);
+
+ a3 = vec_perm(p3, p0, lvsl_a);
+ b3 = vec_ld(3*VS, b);
+ p3 = vec_ld(3*VS, a);
+ acc2 = vec_madd(a2, b2, acc2);
+
+ a += UNROLL_CNT;
+ b += UNROLL_CNT;
+
+ a0 = vec_perm(p0, p1, lvsl_a);
+ b0 = vec_ld(0*VS, b);
+ p0 = vec_ld(0*VS, a);
+ acc3 = vec_madd(a3, b3, acc3);
+ }
+
+ /*
+ * The compiler ought to be able to figure out that 0, 4, 8 and 12
+ * are the only possible values for nleft.
+ */
+ switch (nleft){
+ case 0:
+ break;
+
+ case 4:
+ acc0 = vec_madd(a0, b0, acc0);
+ break;
+
+ case 8:
+ a1 = vec_perm(p1, p2, lvsl_a);
+ b1 = vec_ld(1*VS, b);
+ acc0 = vec_madd(a0, b0, acc0);
+ acc1 = vec_madd(a1, b1, acc1);
+ break;
+
+ case 12:
+ a1 = vec_perm(p1, p2, lvsl_a);
+ b1 = vec_ld(1*VS, b);
+ acc0 = vec_madd(a0, b0, acc0);
+ a2 = vec_perm(p2, p3, lvsl_a);
+ b2 = vec_ld(2*VS, b);
+ acc1 = vec_madd(a1, b1, acc1);
+ acc2 = vec_madd(a2, b2, acc2);
+ break;
+ }
+
+ acc0 = acc0 + acc1;
+ acc2 = acc2 + acc3;
+ acc0 = acc0 + acc2;
+
+ return horizontal_add_f(acc0);
+}
+
+#endif
Property changes on:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
___________________________________________________________________
Name: svn:eol-style
+ native
Added:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
===================================================================
---
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
(rev 0)
+++
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
2008-07-22 06:23:05 UTC (rev 8971)
@@ -0,0 +1,49 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_DOTPROD_FFF_ALTIVEC_H
+#define INCLUDED_DOTPROD_FFF_ALTIVEC_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ * <pre>
+ *
+ * preconditions:
+ *
+ * n > 0 and a multiple of 4
+ * a 4-byte aligned
+ * b 16-byte aligned
+ *
+ * </pre>
+ */
+float
+dotprod_fff_altivec(const float *a, const float *b, size_t n);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* INCLUDED_DOTPROD_FFF_ALTIVEC_H */
Property changes on:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
___________________________________________________________________
Name: svn:eol-style
+ native
Modified:
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
===================================================================
---
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
2008-07-22 03:57:22 UTC (rev 8970)
+++
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
2008-07-22 06:23:05 UTC (rev 8971)
@@ -28,137 +28,8 @@
#include <assert.h>
#include <gr_math.h>
#include <gr_altivec.h>
+#include <dotprod_fff_altivec.h>
-extern "C" {
-
-#if 0
-
-float
-dotprod_fff_altivec(const float *a, const float *b, size_t n)
-{
- float sum = 0;
- for (size_t i = 0; i < n; i++){
- sum += a[i] * b[i];
- }
- return sum;
-}
-
-#else
-/*
- * preconditions:
- *
- * n > 0 and a multiple of 4
- * a 4-byte aligned
- * b 16-byte aligned
- */
-float
-dotprod_fff_altivec(const float *_a, const float *_b, size_t n)
-{
- const vector float *a = (const vector float *) _a;
- const vector float *b = (const vector float *) _b;
-
- static const size_t UNROLL_CNT = 4;
-
- n = gr_p2_round_down(n, 4);
- size_t loop_cnt = n / (UNROLL_CNT * FLOATS_PER_VEC);
- size_t nleft = n % (UNROLL_CNT * FLOATS_PER_VEC);
-
- // printf("n = %zd, loop_cnt = %zd, nleft = %zd\n", n, loop_cnt, nleft);
-
- // Used with vperm to build a* from p*
- vector unsigned char lvsl_a = vec_lvsl(0, _a);
-
- vector float p0, p1, p2, p3;
- vector float a0, a1, a2, a3;
- vector float b0, b1, b2, b3;
- vector float acc0 = {0, 0, 0, 0};
- vector float acc1 = {0, 0, 0, 0};
- vector float acc2 = {0, 0, 0, 0};
- vector float acc3 = {0, 0, 0, 0};
-
- // wind in
-
- register int r0vs = 0 * VS;
- register int r1vs = 1 * VS;
- register int r2vs = 2 * VS;
- register int r3vs = 3 * VS;
-
- p0 = vec_ld(r0vs, a);
- p1 = vec_ld(r1vs, a);
- p2 = vec_ld(r2vs, a);
- p3 = vec_ld(r3vs, a);
- a += UNROLL_CNT;
-
- a0 = vec_perm(p0, p1, lvsl_a);
- b0 = vec_ld(r0vs, b);
- p0 = vec_ld(r0vs, a);
-
- for (size_t i = 0; i < loop_cnt; i++){
-
- a1 = vec_perm(p1, p2, lvsl_a);
- b1 = vec_ld(r1vs, b);
- p1 = vec_ld(r1vs, a);
- acc0 = vec_madd(a0, b0, acc0);
-
- a2 = vec_perm(p2, p3, lvsl_a);
- b2 = vec_ld(r2vs, b);
- p2 = vec_ld(r2vs, a);
- acc1 = vec_madd(a1, b1, acc1);
-
- a3 = vec_perm(p3, p0, lvsl_a);
- b3 = vec_ld(r3vs, b);
- p3 = vec_ld(r3vs, a);
- acc2 = vec_madd(a2, b2, acc2);
-
- a += UNROLL_CNT;
- b += UNROLL_CNT;
-
- a0 = vec_perm(p0, p1, lvsl_a);
- b0 = vec_ld(r0vs, b);
- p0 = vec_ld(r0vs, a);
- acc3 = vec_madd(a3, b3, acc3);
- }
-
- /*
- * The compiler ought to be able to figure out that 0, 4, 8 and 12
- * are the only possible values for nleft.
- */
- switch (nleft){
- case 0:
- break;
-
- case 4:
- acc0 = vec_madd(a0, b0, acc0);
- break;
-
- case 8:
- a1 = vec_perm(p1, p2, lvsl_a);
- b1 = vec_ld(r1vs, b);
- acc0 = vec_madd(a0, b0, acc0);
- acc1 = vec_madd(a1, b1, acc1);
- break;
-
- case 12:
- a1 = vec_perm(p1, p2, lvsl_a);
- b1 = vec_ld(r1vs, b);
- acc0 = vec_madd(a0, b0, acc0);
- a2 = vec_perm(p2, p3, lvsl_a);
- b2 = vec_ld(r2vs, b);
- acc1 = vec_madd(a1, b1, acc1);
- acc2 = vec_madd(a2, b2, acc2);
- break;
- }
-
- acc0 = acc0 + acc1;
- acc2 = acc2 + acc3;
- acc0 = acc0 + acc2;
-
- return horizontal_add_f(acc0);
-}
-
-#endif
-}
-
gr_fir_fff_altivec::gr_fir_fff_altivec()
: gr_fir_fff_generic(),
d_naligned_taps(0), d_aligned_taps(0)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r8971 - gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter,
eb <=