commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r8306 - gnuradio/branches/developers/eb/gcell-wip/gcel


From: eb
Subject: [Commit-gnuradio] r8306 - gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu
Date: Sat, 3 May 2008 13:25:34 -0600 (MDT)

Author: eb
Date: 2008-05-03 13:25:32 -0600 (Sat, 03 May 2008)
New Revision: 8306

Modified:
   
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
   gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
Log:
work-in-progress

Modified: 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
===================================================================
--- 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
   2008-05-03 05:42:53 UTC (rev 8305)
+++ 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
   2008-05-03 19:25:32 UTC (rev 8306)
@@ -1,4 +1,4 @@
-/* -*- c -*- */
+/* -*- asm -*- */
 /*
  * Copyright 2008 Free Software Foundation, Inc.
  * 
@@ -192,6 +192,18 @@
 
 /*
  * ----------------------------------------------------------------
+ *                         pseudo ops
+ * ----------------------------------------------------------------
+ */
+#define PROC_ENTRY(name)               \
+        .text;                         \
+       .p2align 4;                     \
+       .global name;                   \
+       .type   name, @function;        \
+name:
+
+/*
+ * ----------------------------------------------------------------
  *                 aliases for common operations
  * ----------------------------------------------------------------
  */
@@ -206,7 +218,7 @@
 #define        RETURN()                        bi      lr;
 
 // hint for a return
-#define        RETURN_HINT(ret_label)          hbr     ret_label, lr;
+#define        HINT_RETURN(ret_label)          hbr     ret_label, lr;
 
 // return if zero
 #define BRZ_RETURN(rt)                 biz     rt, lr;
@@ -222,7 +234,9 @@
 
 
 /*
+ * ----------------------------------------------------------------
  * modulo like things for constant moduli that are powers of 2
+ * ----------------------------------------------------------------
  */
 
 // rt = ra & (pow2 - 1)
@@ -244,8 +258,10 @@
        andi    rt, rt, -(pow2);
 
 /*
+ * ----------------------------------------------------------------
  * Splat - replicate a particular slot into all slots
  * Altivec analogs...
+ * ----------------------------------------------------------------
  */
 
 // replicate byte from slot s [0,15]
@@ -271,4 +287,94 @@
        rotqbyi rt, ra, ra, (s) << 3;   /* rotate double into preferred slot    
 */ \
        shufb   rt, rt, rt, _gc_t0;
 
+/*
+ * ----------------------------------------------------------------
+ * lots of min/max variations...
+ *
+ * On a slot by slot basis, compute the min or max
+ *
+ * U - unsigned, else signed
+ * B,H,{} - byte, halfword, word
+ * F float
+ * ----------------------------------------------------------------
+ */
+
+#define MIN_SELB(rt, ra, rb, rc)       selb    rt, ra, rb, rc;
+#define MAX_SELB(rt, ra, rb, rc)       selb    rt, rb, ra, rc;
+       
+       // words
+
+#define MIN(rt, ra, rb) \
+       cgt     _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+#define        MAX(rt, ra, rb) \
+       cgt     _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMIN(rt, ra, rb) \
+       clgt    _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+#define        UMAX(rt, ra, rb) \
+       clgt    _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+       // bytes
+       
+#define MINB(rt, ra, rb) \
+       cgtb    _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+#define        MAXB(rt, ra, rb) \
+       cgtb    _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMINB(rt, ra, rb) \
+       clgtb   _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+#define        UMAXB(rt, ra, rb) \
+       clgtb   _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+       // halfwords
+       
+#define MINH(rt, ra, rb) \
+       cgth    _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+#define        MAXH(rt, ra, rb) \
+       cgth    _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMINH(rt, ra, rb) \
+       clgth   _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+#define        UMAXH(rt, ra, rb) \
+       clgth   _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+       // floats
+       
+#define FMIN(rt, ra, rb) \
+       fcgt    _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+#define        FMAX(rt, ra, rb) \
+       fcgt    _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+// Ignoring the sign, select the values with the minimum magnitude
+#define FMINMAG(rt, ra, rb) \
+       fcmgt   _gc_t0, ra, rb; \
+       MIN_SELB(rt, ra, rb, _gc_t0)
+       
+// Ignoring the sign, select the values with the maximum magnitude
+#define        FMAXMAG(rt, ra, rb) \
+       fcmgt   _gc_t0, ra, rb; \
+       MAX_SELB(rt, ra, rb, _gc_t0)
+
+
 #endif /* INCLUDED_GC_SPU_MACS_H */

Modified: 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
===================================================================
--- 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S    
    2008-05-03 05:42:53 UTC (rev 8305)
+++ 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S    
    2008-05-03 19:25:32 UTC (rev 8306)
@@ -38,11 +38,6 @@
         *      }
         */
        
-       .text
-       .p2align 4
-       .global memset_spe
-       .type   memset_spe, @function
-
 #define        p_arg   arg1    // we're going to clobber arg1 w/ the return 
value
 #define        c       arg2    // the constant we're writing
 #define        n       arg3    // how many bytes to write
@@ -57,25 +52,22 @@
 #define        cond1   r20
 #define        cond2   r21                             
 #define m      r22
-
-memset_spe:
+#define r      r23
+       
+       PROC_ENTRY(memset_spe2)
+       
+       // Hint the return from do_head, in case we head that way.
+       // There's pretty much nothing to can do to hint the branch to it.
+       hbrr    do_head_br, head_complete
+       
        MR(p, p_arg)    // leaves p, the return value, in the correct reg (r3)
        BRZ_RETURN(n)
 
-       VSPLTB(c, c, 3) // splat byte in preferred slot of c into all slots
+       MODULO(t0, p, 16)       // is p%16 == 0?
+       VSPLTB(c, c, 3)         // splat byte in preferred slot of c into all 
slots
+       brnz    t0, do_head     // no, handle it
+head_complete:
 
-       // Is the modified region all within a single quad word?
-               
-       ROUND_DOWN(t0, p, 16)
-       a       t1, p, n
-       ROUND_DOWN(t1, t1, 16)
-       ceq     t1, t0, t1
-       brnz    t1, single_quad_word    // yes
-
-       MODULO(t0, p, 16)               // is p%16 == 0?
-       brnz    t0, do_head             // no, handle it
-head_complete: 
-
        /*
         * preconditions:       
         *   p%16 == 0, n > 0
@@ -83,13 +75,13 @@
        hbrr    middle_loop_br, middle_loop
        
        ROUND_DOWN(an, n, 16)   // an is "aligned n"
+       MODULO(n, n, 16)        // what's left over in the last quad
        brz     an, do_tail     // no whole quad words; skip to tail
        clgti   t0, an, 127     // an >= 128?
-       MODULO(n, n, 16)        // what's left over in the last quad
        brz     t0, middle2     // nope, go handle the cases between 0 and 112
 
        /*
-        * start biting off 128-byte chunks!
+        * 128 bytes / iteration
         */
        .p2align 4
 middle_loop:
@@ -112,7 +104,7 @@
          brnz  cond1, middle_loop
        
        /*
-        * if an and n are both zero, return now
+        * if an and n are both zero, return now 
         */
        BRZ_RETURN(cond2)
 
@@ -136,24 +128,21 @@
          LMR(p, next_p)
 middle2_loop_br:
          brnz  an, middle2_loop
-
-       /*
-        * We're done with the full quadwords.  
-        */
        
+       /* We're done with the full quadwords. */
+       
        /*
         * Handle the final partial quadword.
         * We'll be modifying only the left hand portion of the quad.
         *
-        * an == 0, 0 <= n < 16, p%16 == 0
+        * preconditions:
+        *   an == 0, 0 <= n < 16, p%16 == 0
         */
 do_tail:
-       BRZ_RETURN(n)
+       HINT_RETURN(do_tail_ret)
        il      mask, -1
-         RETURN_HINT(do_tail_ret)
        sfi     t1, n, 16               // t1 = 16 - n
-         lqd   old, 0(p)
-       
+       lqd     old, 0(p)
        shlqby  mask, mask, t1
        selb    t0, old, c, mask
        stqd    t0, 0(p)
@@ -162,49 +151,35 @@
 
        /*
         * ----------------------------------------------------------------
-        * Handle the right-hand side of the first quadword; align p.
+        * Handle the first partial quadword
         *
         * preconditions:
-        *   p%16 != 0, target area touches a mod 16 boundary
+        *   p%16 != 0
         *
-        * postconditions:
-        *   p%16 == 0, n decremented
-        * ----------------------------------------------------------------
+         * postconditions:
+         *   p%16 == 0 or n == 0
+         *
+         *        |-- m --|
+         *     +----------------+----------------+
+         *     |  ////////      |                |
+         *     +----------------+----------------+
+         *        |----- r -----|
+         *        p
+         * ----------------------------------------------------------------
         */
 do_head:
-       hbrr    do_head_br, head_complete
-       lqd     t0, 0(p)
-       MODULO_NEG(m, p, 16)    // # of bytes to overwrite in right edge of t0
+       lqd     old, 0(p)
+       MODULO_NEG(r, p, 16)
        il      mask, -1
+       UMIN(m, r, n)
        shlqby  mask, mask, m   // 1's in the top, m*8 0's in the bottom
-       selb    t1, c, t0, mask
-       stqd    t1, 0(p)
-       sf      n, m, n         // n -= m
+       MR(t1, p)
+       sf      t0, m, r        // t0 = r - m
        a       p, p, m         // p += m
+       rotqby  mask, mask, t0  // rotate 0's to the right place        
+       sf      n, m, n         // n -= m
+       selb    t0, c, old, mask // merge
+       stqd    t0, 0(t1)
        BRZ_RETURN(n)
 do_head_br:
        br      head_complete
-       
-       /*
-        * ----------------------------------------------------------------
-        * 0 < n < 16 and all bytes are in a single quad word.
-        *
-        * We read the value pointed at by p (the old value)
-        * then build a mask that selects which bits of
-        * old and c we're going to use, then selb them and
-        * store the new value back.
-        * ----------------------------------------------------------------
-        */
-single_quad_word:
-       il          mask, -1
-         RETURN_HINT(sqw_return)
-       MODULO(t0, p, 16)
-         lqd       old, 0(p)
-       a           t0, t0, n
-         shlqby    mask, mask, n       // 1's in the top, n*8 0's in the bottom
-       sfi         t0, t0, 16          // t0 = 16 -(n + p%16)
-         rotqby    mask, mask, t0      // rotate the zero bits into the right 
place
-       selb        t0, c, old, mask    // t0 has the combination of old and c
-         stqd      t0, 0(p)            // store it
-sqw_return:
-       RETURN()





reply via email to

[Prev in Thread] Current Thread [Next in Thread]