commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 01/03: volk: adding a binary slicer kernel


From: git
Subject: [Commit-gnuradio] [gnuradio] 01/03: volk: adding a binary slicer kernel
Date: Fri, 13 Jun 2014 23:52:13 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit b2370cf57999815c0fcd6fd7a10217b4607e3be6
Author: Nathan West <address@hidden>
Date:   Mon Jun 9 22:53:50 2014 -0400

    volk: adding a binary slicer kernel
---
 volk/apps/volk_profile.cc                      |   1 +
 volk/kernels/volk/volk_32f_binary_slicer_32i.h | 221 +++++++++++++++++++++++++
 volk/lib/testqa.cc                             |   1 +
 3 files changed, 223 insertions(+)

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 97b0a5b..10426e2 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -153,6 +153,7 @@ int main(int argc, char *argv[]) {
     //VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 
204602, 1000, &results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_32f_s32f_multiply_32f, 1e-4, 1.0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
+    VOLK_PROFILE(volk_32f_binary_slicer_32i, 0, 1.0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
 
     // Until we can update the config on a kernel by kernel basis
     // do not overwrite volk_config when using a regex.
diff --git a/volk/kernels/volk/volk_32f_binary_slicer_32i.h 
b/volk/kernels/volk/volk_32f_binary_slicer_32i.h
new file mode 100644
index 0000000..6444897
--- /dev/null
+++ b/volk/kernels/volk/volk_32f_binary_slicer_32i.h
@@ -0,0 +1,221 @@
+#ifndef INCLUDED_volk_32f_binary_slicer_32f_H
+#define INCLUDED_volk_32f_binary_slicer_32f_H
+
+
+#ifdef LV_HAVE_GENERIC
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The int output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_generic(int* cVector, const 
float* aVector, unsigned int num_points){
+    int* cPtr = cVector;
+    const float* aPtr = aVector;
+    unsigned int number = 0;
+
+    for(number = 0; number < num_points; number++){
+        if( *aPtr++ >= 0) {
+            *cPtr++ = 1;
+        }
+        else {
+            *cPtr++ = 0;
+        }
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The int output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_a_sse2(int* cVector, const 
float* aVector, unsigned int num_points){
+    int* cPtr = cVector;
+    const float* aPtr = aVector;
+    unsigned int number = 0;
+
+    float binary_float_buffer[4];
+    unsigned int quarter_points = num_points / 4;
+    __m128 a_val, res_f, binary_f;
+    __m128i res_i, binary_i;
+    __m128 zero_val;
+    zero_val = _mm_set1_ps (0.0f);
+
+    for(number = 0; number < quarter_points; number++){
+        a_val = _mm_load_ps(aPtr);
+
+        res_f = _mm_cmpge_ps (a_val, zero_val);
+        res_i = _mm_cvtps_epi32 (res_f);
+        binary_i = _mm_srli_epi32 (res_i, 31);
+
+
+        _mm_store_si128((__m128i*)cPtr, binary_i);
+
+
+        cPtr += 4;
+        aPtr += 4;
+    }
+
+    for(number = quarter_points * 4; number < num_points; number++){
+        if( *aPtr++ >= 0) {
+            *cPtr++ = 1;
+        }
+        else {
+            *cPtr++ = 0;
+        }
+    }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The int output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_a_avx(int* cVector, const float* 
aVector, unsigned int num_points){
+    int* cPtr = cVector;
+    const float* aPtr = aVector;
+    unsigned int number = 0;
+
+    float binary_float_buffer[4];
+    unsigned int quarter_points = num_points / 8;
+    __m256 a_val, res_f, binary_f;
+    __m256i res_i, binary_i;
+    __m256 zero_val, one_val;
+    zero_val = _mm256_set1_ps (0.0f);
+    one_val = _mm256_set1_ps (1.0f);
+
+    for(number = 0; number < quarter_points; number++){
+        a_val = _mm256_load_ps(aPtr);
+
+        res_f = _mm256_cmp_ps (a_val, zero_val, 13);
+        binary_f = _mm256_and_ps (res_f, one_val);
+        binary_i = _mm256_cvtps_epi32(binary_f);
+
+
+
+        _mm256_store_si256((__m256i *)cPtr, binary_i);
+
+
+        cPtr += 8;
+        aPtr += 8;
+    }
+
+    for(number = quarter_points * 8; number < num_points; number++){
+        if( *aPtr++ >= 0) {
+            *cPtr++ = 1;
+        }
+        else {
+            *cPtr++ = 0;
+        }
+    }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The int output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_u_sse2(int* cVector, const 
float* aVector, unsigned int num_points){
+    int* cPtr = cVector;
+    const float* aPtr = aVector;
+    unsigned int number = 0;
+
+    float binary_float_buffer[4];
+    unsigned int quarter_points = num_points / 4;
+    __m128 a_val, res_f, binary_f;
+    __m128i res_i, binary_i;
+    __m128 zero_val;
+    zero_val = _mm_set1_ps (0.0f);
+
+    for(number = 0; number < quarter_points; number++){
+        a_val = _mm_loadu_ps(aPtr);
+
+        res_f = _mm_cmpge_ps (a_val, zero_val);
+        res_i = _mm_cvtps_epi32 (res_f);
+        binary_i = _mm_srli_epi32 (res_i, 31);
+
+
+        _mm_storeu_si128((__m128i*)cPtr, binary_i);
+
+
+        cPtr += 4;
+        aPtr += 4;
+    }
+
+    for(number = quarter_points * 4; number < num_points; number++){
+        if( *aPtr++ >= 0) {
+            *cPtr++ = 1;
+        }
+        else {
+            *cPtr++ = 0;
+        }
+    }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
+  \param cVector The int output (either 0 or 1)
+  \param aVector The float input
+  \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_u_avx(int* cVector, const float* 
aVector, unsigned int num_points){
+    int* cPtr = cVector;
+    const float* aPtr = aVector;
+    unsigned int number = 0;
+
+    float binary_float_buffer[4];
+    unsigned int quarter_points = num_points / 8;
+    __m256 a_val, res_f, binary_f;
+    __m256i res_i, binary_i;
+    __m256 zero_val, one_val;
+    zero_val = _mm256_set1_ps (0.0f);
+    one_val = _mm256_set1_ps (1.0f);
+
+    for(number = 0; number < quarter_points; number++){
+        a_val = _mm256_loadu_ps(aPtr);
+
+        res_f = _mm256_cmp_ps (a_val, zero_val, 13);
+        binary_f = _mm256_and_ps (res_f, one_val);
+        binary_i = _mm256_cvtps_epi32(binary_f);
+
+
+
+        _mm256_storeu_si256((__m256i*)cPtr, binary_i);
+
+
+        cPtr += 8;
+        aPtr += 8;
+    }
+
+    for(number = quarter_points * 8; number < num_points; number++){
+        if( *aPtr++ >= 0) {
+            *cPtr++ = 1;
+        }
+        else {
+            *cPtr++ = 0;
+        }
+    }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+
+#endif /* INCLUDED_volk_32f_binary_slicer_32f_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index f97a646..41093d2 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -89,3 +89,4 @@ VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 20462, 
1);
 VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f, 1e-4, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc, 1e-3, 
(lv_32fc_t)lv_cmake(0.953939201, 0.3), 20462, 1);
 VOLK_RUN_TESTS(volk_32f_invsqrt_32f, 1e-2, 0, 20462, 1);
+VOLK_RUN_TESTS(volk_32f_binary_slicer_32i, 0, 0, 20462, 1);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]