commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 04/22: volk: add neon support for 32fc_s32f


From: git
Subject: [Commit-gnuradio] [gnuradio] 04/22: volk: add neon support for 32fc_s32fc_multiply_32fc
Date: Fri, 31 Oct 2014 19:22:30 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit 520ac293c30d725225c5f984b8bf55e6f1caecf3
Author: Nathan West <address@hidden>
Date:   Sat Oct 18 17:56:56 2014 -0500

    volk: add neon support for 32fc_s32fc_multiply_32fc
---
 volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h | 41 ++++++++++++++++++++---
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h 
b/volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
index 945b4b5..474b982 100644
--- a/volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
+++ b/volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
@@ -252,6 +252,43 @@ static inline void 
volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, cons
 }
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+  /*!
+    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
+    \param cVector The vector where the results will be stored
+    \param aVector One of the vectors to be multiplied
+    \param bVector One of the vectors to be multiplied
+    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
+  */
+static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+    lv_32fc_t* cPtr = cVector;
+    const lv_32fc_t* aPtr = aVector;
+    unsigned int number = num_points;
+    unsigned int quarter_points = num_points / 4;
+
+    float32x4x2_t a_val, scalar_val;
+    float32x4x2_t tmp_imag;
+
+    scalar_val = vld2q_f32((const float*)&scalar);
+    for(number = 0; number < quarter_points; ++number) {
+        a_val = vld2q_f32((float*)aPtr);
+        tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]);
+        tmp_imag.val[0] = vmulq_f32(a_val.val[0], scalar_val.val[0]);
+
+        tmp_imag.val[1] = vmlaq_f32(tmp_imag.val[1], a_val.val[0], 
scalar_val.val[1]);
+        tmp_imag.val[0] = vmlaq_f32(tmp_imag.val[0], a_val.val[1], 
scalar_val.val[1]);
+
+        vst2q_f32((float*)cVector, tmp_imag);
+        aPtr += 4;
+        cVector += 4;
+    }
+
+    for(number = quarter_points*4; number < num_points; number++){
+      *cPtr++ = *aPtr++ * scalar;
+    }
+}
+#endif /* LV_HAVE_NEON */
 
 #ifdef LV_HAVE_GENERIC
   /*!
@@ -285,8 +322,4 @@ static inline void 
volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, c
 }
 #endif /* LV_HAVE_GENERIC */
 
-
-
-
-
 #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]