[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r8305 - in gnuradio/branches/developers/eb/gcell-wip:
From: |
eb |
Subject: |
[Commit-gnuradio] r8305 - in gnuradio/branches/developers/eb/gcell-wip: . gcell/src/lib/general/spu gcell/src/lib/spu |
Date: |
Fri, 2 May 2008 23:42:53 -0600 (MDT) |
Author: eb
Date: 2008-05-02 23:42:53 -0600 (Fri, 02 May 2008)
New Revision: 8305
Added:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
Modified:
gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am
Log:
work-in-progress
Modified: gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu
===================================================================
--- gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu
2008-05-03 00:29:48 UTC (rev 8304)
+++ gnuradio/branches/developers/eb/gcell-wip/Makefile.common.spu
2008-05-03 05:42:53 UTC (rev 8305)
@@ -27,7 +27,8 @@
AR=spu-ar
RANLIB=spu-ranlib
-CC=spu-cc
+CC=spu-gcc
+CCAS = spu-gcc
LD=spu-ld
C_WARNINGS = \
Modified:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
===================================================================
---
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
2008-05-03 00:29:48 UTC (rev 8304)
+++
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
2008-05-03 05:42:53 UTC (rev 8305)
@@ -186,54 +186,21 @@
// scratch registers reserved for use by the macros in this file.
-#define _gcell_t0 r79
-#define _gcell_t1 r78
-#define _gcell_t2 r77
+#define _gc_t0 r79
+#define _gc_t1 r78
+#define _gc_t2 r77
/*
* ----------------------------------------------------------------
- * pseudo ops
- * ----------------------------------------------------------------
- */
-
-/*
- * Pad the location counter in the current subsection to a particular
- * storage boundary. The expression is the number of low-order zero
- * bits the location counter must have after advancement.
- */
-#define P2ALIGN(log2_align) .p2align log2_align
-
-/*
- * Like P2ALIGN, only pads with nop's and lnop's as appropriate.
- * This can be useful to pad the start of a loop to a desired boundary.
- */
-#define P2ALIGN_NOPS(log2_align) _p2_align_code log2_align;
-
-.macro _p2align_nops log2_align
-.ifeq (. & ((1 << \log2_align) - 1)) // done?
- // yes, nothing to do.
-.else
- // no, generate correct nop
- .ifeq (. & 0x4) // even?
- nop;
- .else
- lnop;
- .endif
- _p2align_nops \log2_align
-.endif
-.endm
-
-/*
- * ----------------------------------------------------------------
* aliases for common operations
* ----------------------------------------------------------------
*/
// Move register (even pipe, 2 cycles)
-#define MR(rt, ra) or rt, ra, ra
+#define MR(rt, ra) or rt, ra, ra;
// Move register (odd pipe, 4 cycles)
-#define LMR(rt, ra) rotqbyi rt, ra, 0
+#define LMR(rt, ra) rotqbyi rt, ra, 0;
// return
#define RETURN() bi lr;
@@ -247,10 +214,10 @@
// return if not zero
#define BRNZ_RETURN(rt) binz rt, lr;
-// return if half-word zero
+// return if halfword zero
#define BRHZ_RETURN(rt) bihz rt, lr;
-// return if half-word not zero
+// return if halfword not zero
#define BRHNZ_RETURN(rt) bihnz rt, lr;
@@ -259,48 +226,49 @@
*/
// rt = ra & (pow2 - 1)
-#define MODULO(rt, ra, pow2) \
- andi rt, ra, (pow2)-1
+#define MODULO(rt, ra, pow2) \
+ andi rt, ra, (pow2)-1;
// rt = pow2 - (ra & (pow2 - 1))
-#define MODULO_NEG(rt, ra, pow2) \
- andi rt, ra, (pow2)-1 \
- sfi rt, rt, (pow2)
+#define MODULO_NEG(rt, ra, pow2) \
+ andi rt, ra, (pow2)-1; \
+ sfi rt, rt, (pow2);
// rt = ra & -(pow2)
-#define ROUND_DOWN(rt, ra, pow2) \
- andi rt, ra, -(pow2)
+#define ROUND_DOWN(rt, ra, pow2) \
+ andi rt, ra, -(pow2);
// rt = (ra + (pow2 - 1)) & -(pow2)
-#define ROUND_UP(rt, ra, pow2) \
- ai rt, ra, (pow2)-1 \
- andi rt, rt, -(pow2)
+#define ROUND_UP(rt, ra, pow2) \
+ ai rt, ra, (pow2)-1; \
+ andi rt, rt, -(pow2);
/*
* Splat - replicate a particular slot into all slots
* Altivec analogs...
*/
-// replicate byte in slot s [0,15]
-#define VSPLTB(rt, ra, s) \
- ilh rt (s)*0x0101 \
- shubf rt, ra, ra, rt
+// replicate byte from slot s [0,15]
+#define VSPLTB(rt, ra, s) \
+ ilh _gc_t0, (s)*0x0101; \
+ shufb rt, ra, ra, _gc_t0;
-// replicate halfword in slot s [0,7]
-#define VSPLTH(rt, ra, s) \
- ilh rt, 2*(s)*0x0101 + 0x0001 \
- shufb rt, ra, ra, rt
+// replicate halfword from slot s [0,7]
+#define VSPLTH(rt, ra, s) \
+ ilh _gc_t0, 2*(s)*0x0101 + 0x0001; \
+ shufb rt, ra, ra, _gc_t0;
-// replicate word in slot s [0,3]
-#define VSPLTW(rt, ra, s) \
- iluh rt, 4*(s)*0x0101 + 0x0001 \
- iohl rt, 4*(s)*0x0101 + 0x0203
+// replicate word from slot s [0,3]
+#define VSPLTW(rt, ra, s) \
+ iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \
+ iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \
+ shufb rt, ra, ra, _gc_t0;
-// replicate double in slot s [0,1]
-#define VSPLTD(rt, ra, s)
\
- // sp is always 16-byte aligned
\
- cdd _gcell_t0, 8(sp) // 0x10111213 14151617 00010203
04050607 \
- rotqbyi rt, ra, ra, (s) << 3 // rotate into preferred slot
\
- shufb rt, rt, rt, _gcell_t0
+// replicate double from slot s [0,1]
+#define VSPLTD(rt, ra, s) \
+ /* sp is always 16-byte aligned */ \
+ cdd _gc_t0, 8(sp); /* 0x10111213 14151617 00010203
04050607 */ \
+ rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot
*/ \
+ shufb rt, rt, rt, _gc_t0;
#endif /* INCLUDED_GC_SPU_MACS_H */
Added:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
===================================================================
---
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
(rev 0)
+++
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
2008-05-03 05:42:53 UTC (rev 8305)
@@ -0,0 +1,210 @@
+/* -*- asm -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <gc_spu_macs.h>
+
+ .file "memset.S"
+
+ /*
+ * Computes this, only a lot faster...
+ *
+ * void *
+ * memset(void *pv, int c, size_t n)
+ * {
+ * unsigned char *p = (unsigned char *) pv;
+ * size_t i;
+ * for (i = 0; i < n; i++)
+ * p[i] = c;
+ *
+ * return pv;
+ * }
+ */
+
+ .text
+ .p2align 4
+ .global memset_spe
+ .type memset_spe, @function
+
+#define p_arg arg1 // we're going to clobber arg1 w/ the return
value
+#define c arg2 // the constant we're writing
+#define n arg3 // how many bytes to write
+
+#define p r13 // where we're writing
+#define t0 r14
+#define t1 r15
+#define mask r16
+#define old r17
+#define an r18 // aligned n (n rounded down to mod 16 boundary)
+#define next_p r19
+#define cond1 r20
+#define cond2 r21
+#define m r22
+
+memset_spe:
+ MR(p, p_arg) // leaves p, the return value, in the correct reg (r3)
+ BRZ_RETURN(n)
+
+ VSPLTB(c, c, 3) // splat byte in preferred slot of c into all slots
+
+ // Is the modified region all within a single quad word?
+
+ ROUND_DOWN(t0, p, 16)
+ a t1, p, n
+ ROUND_DOWN(t1, t1, 16)
+ ceq t1, t0, t1
+ brnz t1, single_quad_word // yes
+
+ MODULO(t0, p, 16) // is p%16 == 0?
+ brnz t0, do_head // no, handle it
+head_complete:
+
+ /*
+ * preconditions:
+ * p%16 == 0, n > 0
+ */
+ hbrr middle_loop_br, middle_loop
+
+ ROUND_DOWN(an, n, 16) // an is "aligned n"
+ brz an, do_tail // no whole quad words; skip to tail
+ clgti t0, an, 127 // an >= 128?
+ MODULO(n, n, 16) // what's left over in the last quad
+ brz t0, middle2 // nope, go handle the cases between 0 and 112
+
+ /*
+ * start biting off 128-byte chunks!
+ */
+ .p2align 4
+middle_loop:
+ ai an, an, -128
+ stqd c, 0*16(p)
+ ai next_p, p, 128
+ stqd c, 1*16(p)
+ cgti cond1, an, 127
+ stqd c, 2*16(p)
+
+ stqd c, 3*16(p)
+ stqd c, 4*16(p)
+ stqd c, 5*16(p)
+ stqd c, 6*16(p)
+
+ MR(p, next_p)
+ stqd c, 7*16-128(next_p)
+ or cond2, n, an
+middle_loop_br:
+ brnz cond1, middle_loop
+
+ /*
+ * if an and n are both zero, return now
+ */
+ BRZ_RETURN(cond2)
+
+ /*
+ * otherwise handle last of full quad words
+ *
+ * 0 <= an < 128, p%16 == 0
+ */
+middle2:
+ /*
+ * if an == 0, go handle the final non-full quadword
+ */
+ brz an, do_tail
+ hbrr middle2_loop_br, middle2_loop
+
+ .p2align 3
+middle2_loop:
+ ai next_p, p, 16
+ stqd c, 0(p)
+ ai an, an, -16
+ LMR(p, next_p)
+middle2_loop_br:
+ brnz an, middle2_loop
+
+ /*
+ * We're done with the full quadwords.
+ */
+
+ /*
+ * Handle the final partial quadword.
+ * We'll be modifying only the left hand portion of the quad.
+ *
+ * an == 0, 0 <= n < 16, p%16 == 0
+ */
+do_tail:
+ BRZ_RETURN(n)
+ il mask, -1
+ RETURN_HINT(do_tail_ret)
+ sfi t1, n, 16 // t1 = 16 - n
+ lqd old, 0(p)
+
+ shlqby mask, mask, t1
+ selb t0, old, c, mask
+ stqd t0, 0(p)
+do_tail_ret:
+ RETURN()
+
+ /*
+ * ----------------------------------------------------------------
+ * Handle the right-hand side of the first quadword; align p.
+ *
+ * preconditions:
+ * p%16 != 0, target area touches a mod 16 boundary
+ *
+ * postconditions:
+ * p%16 == 0, n decremented
+ * ----------------------------------------------------------------
+ */
+do_head:
+ hbrr do_head_br, head_complete
+ lqd t0, 0(p)
+ MODULO_NEG(m, p, 16) // # of bytes to overwrite in right edge of t0
+ il mask, -1
+ shlqby mask, mask, m // 1's in the top, m*8 0's in the bottom
+ selb t1, c, t0, mask
+ stqd t1, 0(p)
+ sf n, m, n // n -= m
+ a p, p, m // p += m
+ BRZ_RETURN(n)
+do_head_br:
+ br head_complete
+
+ /*
+ * ----------------------------------------------------------------
+ * 0 < n < 16 and all bytes are in a single quad word.
+ *
+ * We read the value pointed at by p (the old value)
+ * then build a mask that selects which bits of
+ * old and c we're going to use, then selb them and
+ * store the new value back.
+ * ----------------------------------------------------------------
+ */
+single_quad_word:
+ il mask, -1
+ RETURN_HINT(sqw_return)
+ MODULO(t0, p, 16)
+ lqd old, 0(p)
+ a t0, t0, n
+ shlqby mask, mask, n // 1's in the top, n*8 0's in the bottom
+ sfi t0, t0, 16 // t0 = 16 -(n + p%16)
+ rotqby mask, mask, t0 // rotate the zero bits into the right
place
+ selb t0, c, old, mask // t0 has the combination of old and c
+ stqd t0, 0(p) // store it
+sqw_return:
+ RETURN()
Property changes on:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
___________________________________________________________________
Name: svn:eol-style
+ native
Modified:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am
===================================================================
--- gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am
2008-05-03 00:29:48 UTC (rev 8304)
+++ gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/spu/Makefile.am
2008-05-03 05:42:53 UTC (rev 8305)
@@ -58,7 +58,8 @@
general_srcdir = $(srcdir)/../general/spu
general_spu_sources = \
- $(general_srcdir)/fft_1d_r2.c
+ $(general_srcdir)/fft_1d_r2.c \
+ $(general_srcdir)/memset.S
general_spu_headers = \
$(general_srcdir)/libfft.h
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r8305 - in gnuradio/branches/developers/eb/gcell-wip: . gcell/src/lib/general/spu gcell/src/lib/spu,
eb <=