commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 17/22: volk: add a neon table version of 16


From: git
Subject: [Commit-gnuradio] [gnuradio] 17/22: volk: add a neon table version of 16u_byteswap
Date: Fri, 31 Oct 2014 19:22:31 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit cb32b70b79f430456208a2cd521d028e0ece5d5b
Author: Nathan West <address@hidden>
Date:   Wed Oct 22 14:39:40 2014 -0500

    volk: add a neon table version of 16u_byteswap
---
 volk/kernels/volk/volk_16u_byteswap.h           | 52 +++++++++++++++++++++++++
 volk/kernels/volk/volk_16u_byteswappuppet_16u.h |  9 +++++
 2 files changed, 61 insertions(+)

diff --git a/volk/kernels/volk/volk_16u_byteswap.h 
b/volk/kernels/volk/volk_16u_byteswap.h
index bffdeed..3b2f9e2 100644
--- a/volk/kernels/volk/volk_16u_byteswap.h
+++ b/volk/kernels/volk/volk_16u_byteswap.h
@@ -158,6 +158,58 @@ static inline void volk_16u_byteswap_neon(uint16_t* 
intsToSwap, unsigned int num
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+  \brief Byteswaps (in-place) an aligned vector of int32_t's.
+  \param intsToSwap The vector of data to byte swap
+  \param numDataPoints The number of data points
+*/
+static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap, unsigned 
int num_points){
+  uint16_t* inputPtr = intsToSwap;
+  unsigned int number = 0;
+  unsigned int n16points = num_points / 16;
+
+  uint8x8x4_t input_table;
+  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
+  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
+
+  /* these magic numbers are used as byte-indeces in the LUT.
+     they are pre-computed to save time. A simple C program
+     can calculate them; for example for lookup01:
+    uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
+    for(ii=0; ii < 8; ++ii) {
+        index += ((uint64_t)(*(chars+ii))) << (ii*8);
+    }
+  */
+  int_lookup01 = vcreate_u8(1232017111498883080);
+  int_lookup23 = vcreate_u8(1376697457175036426);
+  int_lookup45 = vcreate_u8(1521377802851189772);
+  int_lookup67 = vcreate_u8(1666058148527343118);
+
+  for(number = 0; number < n16points; ++number){
+    input_table = vld4_u8((uint8_t*) inputPtr);
+    swapped_int01 = vtbl4_u8(input_table, int_lookup01);
+    swapped_int23 = vtbl4_u8(input_table, int_lookup23);
+    swapped_int45 = vtbl4_u8(input_table, int_lookup45);
+    swapped_int67 = vtbl4_u8(input_table, int_lookup67);
+    vst1_u8((uint8_t*)inputPtr, swapped_int01);
+    vst1_u8((uint8_t*)(inputPtr+4), swapped_int23);
+    vst1_u8((uint8_t*)(inputPtr+8), swapped_int45);
+    vst1_u8((uint8_t*)(inputPtr+12), swapped_int67);
+
+    inputPtr += 16;
+  }
+
+  for(number = n16points * 16; number < num_points; ++number){
+    uint16_t output = *inputPtr;
+    output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
+    *inputPtr = output;
+    inputPtr++;
+  }
+}
+#endif /* LV_HAVE_NEON */
+
 #ifdef LV_HAVE_GENERIC
 /*!
   \brief Byteswaps (in-place) an aligned vector of int16_t's.
diff --git a/volk/kernels/volk/volk_16u_byteswappuppet_16u.h 
b/volk/kernels/volk/volk_16u_byteswappuppet_16u.h
index 699a758..c636e2f 100644
--- a/volk/kernels/volk/volk_16u_byteswappuppet_16u.h
+++ b/volk/kernels/volk/volk_16u_byteswappuppet_16u.h
@@ -24,6 +24,15 @@ static inline void 
volk_16u_byteswappuppet_16u_neon(uint16_t*output, uint16_t* i
 }
 #endif
 
+#ifdef LV_HAVE_NEON
+static inline void volk_16u_byteswappuppet_16u_neon_table(uint16_t*output, 
uint16_t* intsToSwap, unsigned int num_points){
+
+    volk_16u_byteswap_neon_table((uint16_t*)intsToSwap, num_points);
+    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
+
+}
+#endif
+
 #ifdef LV_HAVE_SSE2
 static inline void volk_16u_byteswappuppet_16u_u_sse2(uint16_t* intsToSwap, 
unsigned int num_points){
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]