commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r8305 - in gnuradio/branches/developers/eb/gcell-wip:


From: eb
Subject: [Commit-gnuradio] r8305 - in gnuradio/branches/developers/eb/gcell-wip: . gcell/src/lib/general/spu gcell/src/lib/spu
Date: Fri, 2 May 2008 23:42:53 -0600 (MDT)

Author: eb
Date: 2008-05-02 23:42:53 -0600 (Fri, 02 May 2008)
New Revision: 8305

Added:
   gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
Modified:
   gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu
   
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
   gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am
Log:
work-in-progress

Modified: gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu
===================================================================
--- gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu       
2008-05-03 00:29:48 UTC (rev 8304)
+++ gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu       
2008-05-03 05:42:53 UTC (rev 8305)
@@ -27,7 +27,8 @@
 
 AR=spu-ar
 RANLIB=spu-ranlib
-CC=spu-cc
+CC=spu-gcc
+CCAS = spu-gcc
 LD=spu-ld
 
 C_WARNINGS = \

Modified: 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
===================================================================
--- 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
   2008-05-03 00:29:48 UTC (rev 8304)
+++ 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
   2008-05-03 05:42:53 UTC (rev 8305)
@@ -186,54 +186,21 @@
 
 // scratch registers reserved for use by the macros in this file.
 
-#define _gcell_t0      r79
-#define        _gcell_t1       r78
-#define        _gcell_t2       r77
+#define _gc_t0 r79
+#define        _gc_t1  r78
+#define        _gc_t2  r77
 
 /*
  * ----------------------------------------------------------------
- *                         pseudo ops
- * ----------------------------------------------------------------
- */
-
-/*
- * Pad the location counter in the current subsection to a particular
- * storage boundary.  The expression is the number of low-order zero
- * bits the location counter must have after advancement.
- */
-#define P2ALIGN(log2_align)    .p2align        log2_align
-
-/*
- * Like P2ALIGN, only pads with nop's and lnop's as appropriate.
- * This can be useful to pad the start of a loop to a desired boundary.
- */
-#define        P2ALIGN_NOPS(log2_align) _p2_align_code log2_align;
-
-.macro _p2align_nops log2_align
-.ifeq (. & ((1 << \log2_align) - 1))   // done?
-  // yes, nothing to do.
-.else
-  // no, generate correct nop
-  .ifeq (. & 0x4)                      // even?
-    nop;
-  .else
-    lnop;
-  .endif
-  _p2align_nops \log2_align
-.endif
-.endm
-
-/*
- * ----------------------------------------------------------------
  *                 aliases for common operations
  * ----------------------------------------------------------------
  */
 
 // Move register (even pipe, 2 cycles)
-#define MR(rt, ra)                     or      rt, ra, ra
+#define MR(rt, ra)                     or      rt, ra, ra;
 
 // Move register (odd pipe, 4 cycles)
-#define        LMR(rt, ra)                     rotqbyi rt, ra, 0
+#define        LMR(rt, ra)                     rotqbyi rt, ra, 0;
 
 // return
 #define        RETURN()                        bi      lr;
@@ -247,10 +214,10 @@
 // return if not zero
 #define BRNZ_RETURN(rt)                        binz    rt, lr;
 
-// return if half-word zero
+// return if halfword zero
 #define        BRHZ_RETURN(rt)                 bihz    rt, lr;
 
-// return if half-word not zero
+// return if halfword not zero
 #define BRHNZ_RETURN(rt)               bihnz   rt, lr;
 
 
@@ -259,48 +226,49 @@
  */
 
 // rt = ra & (pow2 - 1)
-#define MODULO(rt, ra, pow2)                   \
-       andi    rt, ra, (pow2)-1
+#define MODULO(rt, ra, pow2) \
+       andi    rt, ra, (pow2)-1;
 
 // rt = pow2 - (ra & (pow2 - 1))
-#define MODULO_NEG(rt, ra, pow2)               \
-       andi    rt, ra, (pow2)-1                \
-       sfi     rt, rt, (pow2)
+#define MODULO_NEG(rt, ra, pow2) \
+       andi    rt, ra, (pow2)-1;               \
+       sfi     rt, rt, (pow2);
 
 // rt = ra & -(pow2)
-#define        ROUND_DOWN(rt, ra, pow2)                \
-       andi    rt, ra, -(pow2)
+#define        ROUND_DOWN(rt, ra, pow2) \
+       andi    rt, ra, -(pow2);
 
 // rt = (ra + (pow2 - 1)) & -(pow2)
-#define ROUND_UP(rt, ra, pow2)                 \
-       ai      rt, ra, (pow2)-1                \
-       andi    rt, rt, -(pow2)
+#define ROUND_UP(rt, ra, pow2) \
+       ai      rt, ra, (pow2)-1;               \
+       andi    rt, rt, -(pow2);
 
 /*
  * Splat - replicate a particular slot into all slots
  * Altivec analogs...
  */
 
-// replicate byte in slot s [0,15]
-#define VSPLTB(rt, ra, s)                      \
-       ilh     rt (s)*0x0101                   \
-       shubf   rt, ra, ra, rt
+// replicate byte from slot s [0,15]
+#define VSPLTB(rt, ra, s) \
+       ilh     _gc_t0, (s)*0x0101;             \
+       shufb   rt, ra, ra, _gc_t0;
 
-// replicate halfword in slot s [0,7]
-#define        VSPLTH(rt, ra, s)                       \
-       ilh     rt, 2*(s)*0x0101 + 0x0001       \
-       shufb   rt, ra, ra, rt
+// replicate halfword from slot s [0,7]
+#define        VSPLTH(rt, ra, s) \
+       ilh     _gc_t0, 2*(s)*0x0101 + 0x0001;  \
+       shufb   rt, ra, ra, _gc_t0;
 
-// replicate word in slot s [0,3]
-#define VSPLTW(rt, ra, s)                      \
-       iluh    rt, 4*(s)*0x0101 + 0x0001       \
-       iohl    rt, 4*(s)*0x0101 + 0x0203       
+// replicate word from slot s [0,3]
+#define VSPLTW(rt, ra, s) \
+       iluh    _gc_t0, 4*(s)*0x0101 + 0x0001;  \
+       iohl    _gc_t0, 4*(s)*0x0101 + 0x0203;  \
+       shufb   rt, ra, ra, _gc_t0;
        
-// replicate double in slot s [0,1]
-#define        VSPLTD(rt, ra, s)                                               
         \
-       // sp is always 16-byte aligned                                         
 \
-       cdd     _gcell_t0, 8(sp)        // 0x10111213 14151617 00010203 
04050607 \
-       rotqbyi rt, ra, ra, (s) << 3    // rotate into preferred slot           
 \
-       shufb   rt, rt, rt, _gcell_t0
+// replicate double from slot s [0,1]
+#define        VSPLTD(rt, ra, s) \
+       /* sp is always 16-byte aligned */ \
+       cdd     _gc_t0, 8(sp);          /* 0x10111213 14151617 00010203 
04050607 */ \
+       rotqbyi rt, ra, ra, (s) << 3;   /* rotate double into preferred slot    
 */ \
+       shufb   rt, rt, rt, _gc_t0;
 
 #endif /* INCLUDED_GC_SPU_MACS_H */

Added: 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
===================================================================
--- 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S    
                            (rev 0)
+++ 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S    
    2008-05-03 05:42:53 UTC (rev 8305)
@@ -0,0 +1,210 @@
+/* -*- asm -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gc_spu_macs.h>
+
+       .file "memset.S"
+
+       /*
+        * Computes this, only a lot faster...
+        *
+        *      void *
+        *      memset(void *pv, int c, size_t n)
+        *      {
+        *        unsigned char *p = (unsigned char *) pv;
+        *        size_t i;
+        *        for (i = 0; i < n; i++)
+        *          p[i] = c;
+        *      
+        *        return pv;
+        *      }
+        */
+       
+       .text
+       .p2align 4
+       .global memset_spe
+       .type   memset_spe, @function
+
+#define        p_arg   arg1    // we're going to clobber arg1 w/ the return 
value
+#define        c       arg2    // the constant we're writing
+#define        n       arg3    // how many bytes to write
+
+#define        p       r13     // where we're writing
+#define        t0      r14
+#define t1     r15
+#define        mask    r16
+#define        old     r17
+#define an     r18     // aligned n (n rounded down to mod 16 boundary)
+#define        next_p  r19
+#define        cond1   r20
+#define        cond2   r21                             
+#define m      r22
+
+memset_spe:
+       MR(p, p_arg)    // leaves p, the return value, in the correct reg (r3)
+       BRZ_RETURN(n)
+
+       VSPLTB(c, c, 3) // splat byte in preferred slot of c into all slots
+
+       // Is the modified region all within a single quad word?
+               
+       ROUND_DOWN(t0, p, 16)
+       a       t1, p, n
+       ROUND_DOWN(t1, t1, 16)
+       ceq     t1, t0, t1
+       brnz    t1, single_quad_word    // yes
+
+       MODULO(t0, p, 16)               // is p%16 == 0?
+       brnz    t0, do_head             // no, handle it
+head_complete: 
+
+       /*
+        * preconditions:       
+        *   p%16 == 0, n > 0
+        */
+       hbrr    middle_loop_br, middle_loop
+       
+       ROUND_DOWN(an, n, 16)   // an is "aligned n"
+       brz     an, do_tail     // no whole quad words; skip to tail
+       clgti   t0, an, 127     // an >= 128?
+       MODULO(n, n, 16)        // what's left over in the last quad
+       brz     t0, middle2     // nope, go handle the cases between 0 and 112
+
+       /*
+        * start biting off 128-byte chunks!
+        */
+       .p2align 4
+middle_loop:
+       ai      an, an, -128
+         stqd  c,  0*16(p)
+       ai      next_p, p, 128
+         stqd  c,  1*16(p)
+       cgti    cond1, an, 127
+         stqd  c,  2*16(p)
+
+         stqd  c,  3*16(p)
+         stqd  c,  4*16(p)
+         stqd  c,  5*16(p)
+         stqd  c,  6*16(p)
+       
+       MR(p, next_p)
+         stqd  c,  7*16-128(next_p)
+       or      cond2, n, an
+middle_loop_br:
+         brnz  cond1, middle_loop
+       
+       /*
+        * if an and n are both zero, return now
+        */
+       BRZ_RETURN(cond2)
+
+       /*
+        * otherwise handle last of full quad words 
+        *
+        *   0 <= an < 128, p%16 == 0
+        */
+middle2:
+       /*
+        * if an == 0, go handle the final non-full quadword
+        */
+       brz     an, do_tail
+       hbrr    middle2_loop_br, middle2_loop
+       
+       .p2align 3
+middle2_loop:  
+       ai      next_p, p, 16
+         stqd  c, 0(p)
+       ai      an, an, -16
+         LMR(p, next_p)
+middle2_loop_br:
+         brnz  an, middle2_loop
+
+       /*
+        * We're done with the full quadwords.  
+        */
+       
+       /*
+        * Handle the final partial quadword.
+        * We'll be modifying only the left hand portion of the quad.
+        *
+        * an == 0, 0 <= n < 16, p%16 == 0
+        */
+do_tail:
+       BRZ_RETURN(n)
+       il      mask, -1
+         RETURN_HINT(do_tail_ret)
+       sfi     t1, n, 16               // t1 = 16 - n
+         lqd   old, 0(p)
+       
+       shlqby  mask, mask, t1
+       selb    t0, old, c, mask
+       stqd    t0, 0(p)
+do_tail_ret:   
+       RETURN()
+
+       /*
+        * ----------------------------------------------------------------
+        * Handle the right-hand side of the first quadword; align p.
+        *
+        * preconditions:
+        *   p%16 != 0, target area touches a mod 16 boundary
+        *
+        * postconditions:
+        *   p%16 == 0, n decremented
+        * ----------------------------------------------------------------
+        */
+do_head:
+       hbrr    do_head_br, head_complete
+       lqd     t0, 0(p)
+       MODULO_NEG(m, p, 16)    // # of bytes to overwrite in right edge of t0
+       il      mask, -1
+       shlqby  mask, mask, m   // 1's in the top, m*8 0's in the bottom
+       selb    t1, c, t0, mask
+       stqd    t1, 0(p)
+       sf      n, m, n         // n -= m
+       a       p, p, m         // p += m
+       BRZ_RETURN(n)
+do_head_br:
+       br      head_complete
+       
+       /*
+        * ----------------------------------------------------------------
+        * 0 < n < 16 and all bytes are in a single quad word.
+        *
+        * We read the value pointed at by p (the old value)
+        * then build a mask that selects which bits of
+        * old and c we're going to use, then selb them and
+        * store the new value back.
+        * ----------------------------------------------------------------
+        */
+single_quad_word:
+       il          mask, -1
+         RETURN_HINT(sqw_return)
+       MODULO(t0, p, 16)
+         lqd       old, 0(p)
+       a           t0, t0, n
+         shlqby    mask, mask, n       // 1's in the top, n*8 0's in the bottom
+       sfi         t0, t0, 16          // t0 = 16 -(n + p%16)
+         rotqby    mask, mask, t0      // rotate the zero bits into the right 
place
+       selb        t0, c, old, mask    // t0 has the combination of old and c
+         stqd      t0, 0(p)            // store it
+sqw_return:
+       RETURN()


Property changes on: 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
___________________________________________________________________
Name: svn:eol-style
   + native

Modified: 
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am
===================================================================
--- gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am     
2008-05-03 00:29:48 UTC (rev 8304)
+++ gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am     
2008-05-03 05:42:53 UTC (rev 8305)
@@ -58,7 +58,8 @@
 general_srcdir = $(srcdir)/../general/spu
 
 general_spu_sources = \
-       $(general_srcdir)/fft_1d_r2.c
+       $(general_srcdir)/fft_1d_r2.c \
+       $(general_srcdir)/memset.S
 
 general_spu_headers = \
        $(general_srcdir)/libfft.h





reply via email to

[Prev in Thread] Current Thread [Next in Thread]