commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 06/22: volk: adding popcnt puppets to qa


From: git
Subject: [Commit-gnuradio] [gnuradio] 06/22: volk: adding popcnt puppets to qa
Date: Fri, 31 Oct 2014 19:22:30 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit 085ab2179d77aa7f7e00c95200fb0c4f3c36fca4
Author: Nathan West <address@hidden>
Date:   Sat Oct 18 17:59:43 2014 -0500

    volk: adding popcnt puppets to qa
---
 volk/kernels/volk/volk_64u_popcnt.h           | 36 ---------------------------
 volk/kernels/volk/volk_64u_popcntpuppet_64u.h |  5 ++--
 volk/lib/testqa.cc                            |  4 +--
 3 files changed, 5 insertions(+), 40 deletions(-)

diff --git a/volk/kernels/volk/volk_64u_popcnt.h 
b/volk/kernels/volk/volk_64u_popcnt.h
index 5eb28c7..0ec72e3 100644
--- a/volk/kernels/volk/volk_64u_popcnt.h
+++ b/volk/kernels/volk/volk_64u_popcnt.h
@@ -74,42 +74,6 @@ static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, 
const uint64_t value)
 #if LV_HAVE_NEON
 #include <arm_neon.h>
 static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value) {
-    /* TABLE LUP
-    unsigned char table[] =   {0, 1, 1, 2, 1, 2, 2, 3,
-                               1, 2, 2, 3, 2, 3, 3, 4,
-                               1, 2, 2, 3, 2, 3, 3, 4,
-                               2, 3, 3, 4, 3, 4, 4, 5,
-                               1, 2, 2, 3, 2, 3, 3, 4,
-                               2, 3, 2, 4, 3, 4, 4, 5,
-                               2, 3, 3, 4, 3, 4, 4, 5,
-                               3, 4, 4, 5, 4, 5, 5, 6};
-
-    // we're stuck with a 64-element table, so treat the MSBs
-    // of each byte as 0 and sum them individually.
-    uint64_t input_7bit = values & 0x7F7F7F7F7F7F7F7F;
-    uint64_t input_msbs = value & 0x8080808080808080;
-    uint64_t sum =  (input_msbs >> 8) ;
-    sum += (input_msbs >> 16);
-    sum += (input_msbs >> 24);
-    sum += (input_msbs >> 32);
-    sum += (input_msbs >> 40);
-    sum += (input_msbs >> 48);
-    sum += (input_msbs >> 56);
-    sum += (input_msbs >> 64);
-
-    uint8x8x4_t table_val;
-    uint8x8_t input_val;
-    uint16x8x2_t intermediate_sum;
-    uint32x8_t intermediate_sum;
-
-    // load the table and input value
-    table_val = vld4q_u8(table);
-    input_val = vld1_u8((unsigned char *) &value);
-
-    // perform the lookup, output is uint8x8_t
-    input_val = vtbl4_u8(table_val, input_val);
-    */
-
     uint8x8_t input_val, count8x8_val;
     uint16x4_t count16x4_val;
     uint32x2_t count32x2_val;
diff --git a/volk/kernels/volk/volk_64u_popcntpuppet_64u.h 
b/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
index 5837d0f..3903e0d 100644
--- a/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
+++ b/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
@@ -30,7 +30,8 @@
 static inline void volk_64u_popcntpuppet_64u_generic(uint64_t* outVector, 
const uint64_t* inVector, unsigned int num_points){
     unsigned int ii;
     for(ii=0; ii < num_points; ++ii) {
-        volk_64u_popcnt_generic(outVector+ii, *(inVector+ii) );
+        volk_64u_popcnt_generic(outVector+ii, num_points );
+        
     }
 }
 #endif /* LV_HAVE_GENERIC */
@@ -39,7 +40,7 @@ static inline void 
volk_64u_popcntpuppet_64u_generic(uint64_t* outVector, const
 static inline void volk_64u_popcntpuppet_64u_neon(uint64_t* outVector, const 
uint64_t* inVector, unsigned int num_points){
     unsigned int ii;
     for(ii=0; ii < num_points; ++ii) {
-        volk_64u_popcnt_neon(outVector+ii, *(inVector+ii) );
+        volk_64u_popcnt_neon(outVector+ii, num_points );
     }
 }
 #endif /* LV_HAVE_NEON */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 7d1826b..a3d8766 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -97,12 +97,12 @@ VOLK_RUN_TESTS(volk_32i_x2_and_32i, 0, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_32i_s32f_convert_32f, 1e-4, 100, 20462, 1);
 VOLK_RUN_TESTS(volk_32i_x2_or_32i, 0, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_32u_byteswap, 0, 0, 20462, 1);
-//VOLK_RUN_TESTS(volk_32u_popcnt, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_32u_popcntpuppet_32u, 0, 0, 2046, 10000);
 VOLK_RUN_TESTS(volk_64f_convert_32f, 1e-4, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_64f_x2_max_64f, 1e-4, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_64f_x2_min_64f, 1e-4, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_64u_byteswap, 0, 0, 20462, 1);
-//VOLK_RUN_TESTS(volk_64u_popcnt, 0, 0, 2046, 10000);
+VOLK_RUN_TESTS(volk_64u_popcntpuppet_64u, 0, 0, 2046, 10000);
 VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2, 0, 0, 20462, 1);
 VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2, 1e-4, 100, 20462, 1);
 VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i, 0, 256, 20462, 1);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]