commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 13/14: volk: fixed a problem with acos duri


From: git
Subject: [Commit-gnuradio] [gnuradio] 13/14: volk: fixed a problem with acos during some translation in the git history.
Date: Wed, 15 Oct 2014 23:25:09 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

trondeau pushed a commit to branch master
in repository gnuradio.

commit 06e0398f4747c2c81f28c6cb9b5796c4fb6de8f6
Author: Tom Rondeau <address@hidden>
Date:   Wed Oct 15 12:53:10 2014 -0400

    volk: fixed a problem with acos during some translation in the git history.
---
 volk/kernels/volk/volk_32f_acos_32f.h | 238 +++++++++++++++++-----------------
 volk/kernels/volk/volk_32f_asin_32f.h |   2 +-
 2 files changed, 119 insertions(+), 121 deletions(-)

diff --git a/volk/kernels/volk/volk_32f_acos_32f.h 
b/volk/kernels/volk/volk_32f_acos_32f.h
index 945ba39..18985f2 100644
--- a/volk/kernels/volk/volk_32f_acos_32f.h
+++ b/volk/kernels/volk/volk_32f_acos_32f.h
@@ -18,67 +18,67 @@
 */
 static inline void volk_32f_acos_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
 
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    int i, j;
-
-    __m128 aVal, d, pi, pio2, x, y, z, arccosine;
-    __m128 fzeroes, fones, ftwos, ffours, condition;
-
-    pi = _mm_set1_ps(3.14159265358979323846);
-    pio2 = _mm_set1_ps(3.14159265358979323846/2);
-    fzeroes = _mm_setzero_ps();
-    fones = _mm_set1_ps(1.0);
-    ftwos = _mm_set1_ps(2.0);
-    ffours = _mm_set1_ps(4.0);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_load_ps(aPtr);
-        d = aVal;
-        aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), 
_mm_sub_ps(fones, aVal))), aVal);
-        z = aVal;
-        condition = _mm_cmplt_ps(z, fzeroes);
-        z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-        x = z;
-        condition = _mm_cmplt_ps(z, fones);
-        x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-        for(i = 0; i < 2; i++){
-            x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, 
x))));
-        }
-        x = _mm_div_ps(fones, x);
-        y = fzeroes;
-        for(j = ACOS_TERMS - 1; j >=0 ; j--){
-            y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
-        }
-
-        y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-        condition = _mm_cmpgt_ps(z, fones);
-
-        y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-        arccosine = y;
-        condition = _mm_cmplt_ps(aVal, fzeroes);
-        arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, 
ftwos), condition));
-        condition = _mm_cmplt_ps(d, fzeroes);
-        arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
-
-        _mm_store_ps(bPtr, arccosine);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-       *bPtr++ = acos(*aPtr++);
-    }
+  float* bPtr = bVector;
+  const float* aPtr = aVector;
+
+  unsigned int number = 0;
+  unsigned int quarterPoints = num_points / 4;
+  int i, j;
+
+  __m128 aVal, d, pi, pio2, x, y, z, arccosine;
+  __m128 fzeroes, fones, ftwos, ffours, condition;
+
+  pi = _mm_set1_ps(3.14159265358979323846);
+  pio2 = _mm_set1_ps(3.14159265358979323846/2);
+  fzeroes = _mm_setzero_ps();
+  fones = _mm_set1_ps(1.0);
+  ftwos = _mm_set1_ps(2.0);
+  ffours = _mm_set1_ps(4.0);
+
+  for(;number < quarterPoints; number++){
+    aVal = _mm_load_ps(aPtr);
+    d = aVal;
+    aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), 
_mm_sub_ps(fones, aVal))), aVal);
+    z = aVal;
+    condition = _mm_cmplt_ps(z, fzeroes);
+    z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
+    x = z;
+    condition = _mm_cmplt_ps(z, fones);
+    x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
+
+    for(i = 0; i < 2; i++)
+      x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
+    x = _mm_div_ps(fones, x);
+    y = fzeroes;
+    for(j = ACOS_TERMS - 1; j >=0 ; j--)
+      y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
+
+    y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
+    condition = _mm_cmpgt_ps(z, fones);
+
+    y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
+    arccosine = y;
+    condition = _mm_cmplt_ps(aVal, fzeroes);
+    arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, ftwos), 
condition));
+    condition = _mm_cmplt_ps(d, fzeroes);
+    arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
+
+    _mm_store_ps(bPtr, arccosine);
+    aPtr += 4;
+    bPtr += 4;
+  }
+
+  number = quarterPoints * 4;
+  for(;number < num_points; number++){
+    *bPtr++ = acos(*aPtr++);
+  }
 }
 
 #endif /* LV_HAVE_SSE4_1 for aligned */
 
-#endif /* INCLUDED_volk_32f_acos_32_f_H */
+#endif /* INCLUDED_volk_32f_acos_32f_a_H */
+
+
 #ifndef INCLUDED_volk_32f_acos_32f_u_H
 #define INCLUDED_volk_32f_acos_32f_u_H
 
@@ -91,63 +91,61 @@ static inline void volk_32f_acos_32f_a_sse4_1(float* 
bVector, const float* aVect
   \param num_points Number of points for which arccosine is to be computed
 */
 static inline void volk_32f_acos_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-        unsigned int quarterPoints = num_points / 4;
-    int i, j;
-
-    __m128 aVal, d, pi, pio2, x, y, z, arccosine;
-    __m128 fzeroes, fones, ftwos, ffours, condition;
-
-    pi = _mm_set1_ps(3.14159265358979323846);
-    pio2 = _mm_set1_ps(3.14159265358979323846/2);
-    fzeroes = _mm_setzero_ps();
-    fones = _mm_set1_ps(1.0);
-    ftwos = _mm_set1_ps(2.0);
-    ffours = _mm_set1_ps(4.0);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_loadu_ps(aPtr);
-        d = aVal;
-        aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), 
_mm_sub_ps(fones, aVal))), aVal);
-        z = aVal;
-        condition = _mm_cmplt_ps(z, fzeroes);
-        z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-        x = z;
-        condition = _mm_cmplt_ps(z, fones);
-        x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-        for(i = 0; i < 2; i++){
-            x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, 
x))));
-        }
-        x = _mm_div_ps(fones, x);
-        y = fzeroes;
-        for(j = ACOS_TERMS - 1; j >=0 ; j--){
-            x = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
-        }
-
-        y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-        condition = _mm_cmpgt_ps(z, fones);
-
-        y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-        arccosine = y;
-        condition = _mm_cmplt_ps(aVal, fzeroes);
-        arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, 
ftwos), condition));
-        condition = _mm_cmplt_ps(d, fzeroes);
-        arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
-
-        _mm_storeu_ps(bPtr, arccosine);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = acos(*aPtr++);
-    }
+  float* bPtr = bVector;
+  const float* aPtr = aVector;
+
+  unsigned int number = 0;
+  unsigned int quarterPoints = num_points / 4;
+  int i, j;
+
+  __m128 aVal, d, pi, pio2, x, y, z, arccosine;
+  __m128 fzeroes, fones, ftwos, ffours, condition;
+
+  pi = _mm_set1_ps(3.14159265358979323846);
+  pio2 = _mm_set1_ps(3.14159265358979323846/2);
+  fzeroes = _mm_setzero_ps();
+  fones = _mm_set1_ps(1.0);
+  ftwos = _mm_set1_ps(2.0);
+  ffours = _mm_set1_ps(4.0);
+
+  for(;number < quarterPoints; number++){
+    aVal = _mm_loadu_ps(aPtr);
+    d = aVal;
+    aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), 
_mm_sub_ps(fones, aVal))), aVal);
+    z = aVal;
+    condition = _mm_cmplt_ps(z, fzeroes);
+    z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
+    x = z;
+    condition = _mm_cmplt_ps(z, fones);
+    x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
+
+    for(i = 0; i < 2; i++)
+      x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
+    x = _mm_div_ps(fones, x);
+    y = fzeroes;
+
+    for(j = ACOS_TERMS - 1; j >=0 ; j--)
+      y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
+
+    y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
+    condition = _mm_cmpgt_ps(z, fones);
+
+    y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
+    arccosine = y;
+    condition = _mm_cmplt_ps(aVal, fzeroes);
+    arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, ftwos), 
condition));
+    condition = _mm_cmplt_ps(d, fzeroes);
+    arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
+
+    _mm_storeu_ps(bPtr, arccosine);
+    aPtr += 4;
+    bPtr += 4;
+  }
+
+  number = quarterPoints * 4;
+  for(;number < num_points; number++){
+    *bPtr++ = acos(*aPtr++);
+  }
 }
 
 #endif /* LV_HAVE_SSE4_1 for aligned */
@@ -160,13 +158,13 @@ static inline void volk_32f_acos_32f_u_sse4_1(float* 
bVector, const float* aVect
   \param num_points Number of points for which arccosine is to be computed
 */
 static inline void volk_32f_acos_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
+  float* bPtr = bVector;
+  const float* aPtr = aVector;
+  unsigned int number = 0;
 
-    for(number = 0; number < num_points; number++){
-        *bPtr++ = acos(*aPtr++);
-    }
+  for(number = 0; number < num_points; number++){
+    *bPtr++ = acos(*aPtr++);
+  }
 
 }
 #endif /* LV_HAVE_GENERIC */
diff --git a/volk/kernels/volk/volk_32f_asin_32f.h 
b/volk/kernels/volk/volk_32f_asin_32f.h
index 2bae3a5..d7322a4 100644
--- a/volk/kernels/volk/volk_32f_asin_32f.h
+++ b/volk/kernels/volk/volk_32f_asin_32f.h
@@ -164,4 +164,4 @@ static inline void volk_32f_asin_32f_u_generic(float* 
bVector, const float* aVec
 }
 #endif /* LV_HAVE_GENERIC */
 
-#endif /* INCLUDED_volk_32f_asin_32f_a_H */
+#endif /* INCLUDED_volk_32f_asin_32f_u_H */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]