commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_d


From: git
Subject: [Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_dot_prod_32fc
Date: Fri, 31 Oct 2014 19:22:30 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit c84890860e506ab0be0014674401910ac80ec267
Author: Nathan West <address@hidden>
Date:   Sat Oct 18 22:27:52 2014 -0500

    volk: add neon kernel for 16i_32fc_dot_prod_32fc
---
 volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h | 48 ++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h 
b/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
index 27f0bf6..2656d76 100644
--- a/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
+++ b/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
@@ -29,7 +29,6 @@
 
 #ifdef LV_HAVE_GENERIC
 
-
 static inline void volk_16i_32fc_dot_prod_32fc_generic(lv_32fc_t* result, 
const short* input, const lv_32fc_t * taps, unsigned int num_points) {
 
   static const int N_UNROLL = 4;
@@ -58,7 +57,54 @@ static inline void 
volk_16i_32fc_dot_prod_32fc_generic(lv_32fc_t* result, const
 
 #endif /*LV_HAVE_GENERIC*/
 
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+static inline void volk_16i_32fc_dot_prod_32fc_neon(lv_32fc_t* result, const 
short* input, const lv_32fc_t * taps, unsigned int num_points) {
+
+  unsigned ii;
+  unsigned quarter_points = num_points / 4;
+  lv_32fc_t* tapsPtr = (lv_32fc_t*) taps;
+  short* inputPtr = (short*) input;
+  lv_32fc_t accumulator_vec[4];
+
+  float32x4x2_t tapsVal, accumulator_val;
+  int16x4_t input16;
+  int32x4_t input32;
+  float32x4_t input_float, prod_re, prod_im;
+
+  accumulator_val.val[0] = vdupq_n_f32(0.0);
+  accumulator_val.val[1] = vdupq_n_f32(0.0);
+
+  for(ii = 0; ii < quarter_points; ++ii) {
+    tapsVal = vld2q_f32((float*)tapsPtr);
+    input16 = vld1_s16(inputPtr);
+    // widen 16-bit int to 32-bit int
+    input32 = vmovl_s16(input16);
+    // convert 32-bit int to float with scale
+    input_float = vcvtq_f32_s32(input32);
+
+    prod_re = vmulq_f32(input_float, tapsVal.val[0]);
+    prod_im = vmulq_f32(input_float, tapsVal.val[1]);
+
+    accumulator_val.val[0] = vaddq_f32(prod_re, accumulator_val.val[0]);
+    accumulator_val.val[1] = vaddq_f32(prod_im, accumulator_val.val[1]);
+
+    tapsPtr += 4;
+    inputPtr += 4;
+  }
+  vst2q_f32((float*)accumulator_vec, accumulator_val);
+  accumulator_vec[0] += accumulator_vec[1];
+  accumulator_vec[2] += accumulator_vec[3];
+  accumulator_vec[0] += accumulator_vec[2];
+
+  for(ii = quarter_points * 4; ii < num_points; ++ii) {
+    accumulator_vec[0] += *(tapsPtr++) * (float)(*(inputPtr++));
+  }
+
+  *result = accumulator_vec[0];
+}
 
+#endif /*LV_HAVE_NEON*/
 
 #if LV_HAVE_SSE && LV_HAVE_MMX
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]