commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support


From: git
Subject: [Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support
Date: Fri, 31 Oct 2014 19:22:30 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit 068bc75e2a2254f1ea8a8607d22b38bc41eeaefd
Author: Nathan West <address@hidden>
Date:   Sat Oct 18 17:57:55 2014 -0500

    volk: popcnt support
    
    Add a neon protokernel for 64-bit popcnt, and puppets so 64-bit and
    32-bit versions can be tested with volk_profile
---
 volk/apps/volk_profile.cc                     |  4 +-
 volk/kernels/volk/volk_32u_popcntpuppet_32u.h | 47 ++++++++++++++++++++++
 volk/kernels/volk/volk_64u_popcnt.h           | 56 +++++++++++++++++++++++++++
 volk/kernels/volk/volk_64u_popcntpuppet_64u.h | 47 ++++++++++++++++++++++
 4 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 5030836..e3f0ba7 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -220,12 +220,12 @@ int main(int argc, char *argv[]) {
     VOLK_PROFILE(volk_32i_s32f_convert_32f, 1e-4, 100, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_32i_x2_or_32i, 0, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_32u_byteswap, 0, 0, 204602, 2000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_32u_popcnt, 0, 0, 2046, 10000, &results, 
benchmark_mode, kernel_regex);
+    VOLK_PUPPET_PROFILE(volk_32u_popcntpuppet_32u, volk32u_popcnt_32u,  0, 0, 
2046, 10000, &results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_64f_convert_32f, 1e-4, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_64f_x2_max_64f, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_64f_x2_min_64f, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_64u_byteswap, 0, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_64u_popcnt, 0, 0, 2046, 10000, &results, 
benchmark_mode, kernel_regex);
+    VOLK_PUPPET_PROFILE(volk_64u_popcntpuppet_64u, volk_64u_popcnt, 0, 0, 
2046, 10000, &results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_8ic_deinterleave_16i_x2, 0, 0, 204602, 3000, &results, 
benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2, 1e-4, 100, 204602, 3000, 
&results, benchmark_mode, kernel_regex);
     VOLK_PROFILE(volk_8ic_deinterleave_real_16i, 0, 256, 204602, 3000, 
&results, benchmark_mode, kernel_regex);
diff --git a/volk/kernels/volk/volk_32u_popcntpuppet_32u.h 
b/volk/kernels/volk/volk_32u_popcntpuppet_32u.h
new file mode 100644
index 0000000..056983e
--- /dev/null
+++ b/volk/kernels/volk/volk_32u_popcntpuppet_32u.h
@@ -0,0 +1,47 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2014 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_volk_32u_popcntpuppet_32u_H
+#define INCLUDED_volk_32u_popcntpuppet_32u_H
+
+#include <stdint.h>
+#include <volk/volk_32u_popcnt.h>
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_32u_popcntpuppet_32u_generic(uint32_t* outVector, 
const uint32_t* inVector, unsigned int num_points){
+    unsigned int ii;
+    for(ii=0; ii < num_points; ++ii) {
+        volk_32u_popcnt_generic(outVector+ii, *(inVector+ii) );
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_SSE_4_2
+static inline void volk_32u_popcntpuppet_32u_a_sse4_2(uint32_t* outVector, 
const uint32_t* inVector, unsigned int num_points){
+    unsigned int ii;
+    for(ii=0; ii < num_points; ++ii) {
+        volk_32u_popcnt_a_sse4_2(outVector+ii, *(inVector+ii) );
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
diff --git a/volk/kernels/volk/volk_64u_popcnt.h 
b/volk/kernels/volk/volk_64u_popcnt.h
index d425cd5..5eb28c7 100644
--- a/volk/kernels/volk/volk_64u_popcnt.h
+++ b/volk/kernels/volk/volk_64u_popcnt.h
@@ -71,4 +71,60 @@ static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, 
const uint64_t value)
 
 #endif /*LV_HAVE_SSE4_2*/
 
+#if LV_HAVE_NEON
+#include <arm_neon.h>
+static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value) {
+    /* TABLE LUP
+    unsigned char table[] =   {0, 1, 1, 2, 1, 2, 2, 3,
+                               1, 2, 2, 3, 2, 3, 3, 4,
+                               1, 2, 2, 3, 2, 3, 3, 4,
+                               2, 3, 3, 4, 3, 4, 4, 5,
+                               1, 2, 2, 3, 2, 3, 3, 4,
+                               2, 3, 2, 4, 3, 4, 4, 5,
+                               2, 3, 3, 4, 3, 4, 4, 5,
+                               3, 4, 4, 5, 4, 5, 5, 6};
+
+    // we're stuck with a 64-element table, so treat the MSBs
+    // of each byte as 0 and sum them individually.
+    uint64_t input_7bit = values & 0x7F7F7F7F7F7F7F7F;
+    uint64_t input_msbs = value & 0x8080808080808080;
+    uint64_t sum =  (input_msbs >> 8) ;
+    sum += (input_msbs >> 16);
+    sum += (input_msbs >> 24);
+    sum += (input_msbs >> 32);
+    sum += (input_msbs >> 40);
+    sum += (input_msbs >> 48);
+    sum += (input_msbs >> 56);
+    sum += (input_msbs >> 64);
+
+    uint8x8x4_t table_val;
+    uint8x8_t input_val;
+    uint16x8x2_t intermediate_sum;
+    uint32x8_t intermediate_sum;
+
+    // load the table and input value
+    table_val = vld4q_u8(table);
+    input_val = vld1_u8((unsigned char *) &value);
+
+    // perform the lookup, output is uint8x8_t
+    input_val = vtbl4_u8(table_val, input_val);
+    */
+
+    uint8x8_t input_val, count8x8_val;
+    uint16x4_t count16x4_val;
+    uint32x2_t count32x2_val;
+    uint64x1_t count64x1_val;
+
+    input_val = vld1_u8((unsigned char *) &value);
+    count8x8_val = vcnt_u8(input_val);
+    count16x4_val = vpaddl_u8(count8x8_val);
+    count32x2_val = vpaddl_u16(count16x4_val);
+    count64x1_val = vpaddl_u32(count32x2_val);
+    vst1_u64(ret, count64x1_val);
+
+    //*ret = _mm_popcnt_u64(value);
+
+}
+#endif /*LV_HAVE_NEON*/
+
 #endif /*INCLUDED_volk_64u_popcnt_a_H*/
diff --git a/volk/kernels/volk/volk_64u_popcntpuppet_64u.h 
b/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
new file mode 100644
index 0000000..5837d0f
--- /dev/null
+++ b/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
@@ -0,0 +1,47 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2014 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_volk_64u_popcntpuppet_64u_H
+#define INCLUDED_volk_64u_popcntpuppet_64u_H
+
+#include <stdint.h>
+#include <volk/volk_64u_popcnt.h>
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_64u_popcntpuppet_64u_generic(uint64_t* outVector, 
const uint64_t* inVector, unsigned int num_points){
+    unsigned int ii;
+    for(ii=0; ii < num_points; ++ii) {
+        volk_64u_popcnt_generic(outVector+ii, *(inVector+ii) );
+    }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_NEON
+static inline void volk_64u_popcntpuppet_64u_neon(uint64_t* outVector, const 
uint64_t* inVector, unsigned int num_points){
+    unsigned int ii;
+    for(ii=0; ii < num_points; ++ii) {
+        volk_64u_popcnt_neon(outVector+ii, *(inVector+ii) );
+    }
+}
+#endif /* LV_HAVE_NEON */
+
+#endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]