[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r8306 - gnuradio/branches/developers/eb/gcell-wip/gcel
From: |
eb |
Subject: |
[Commit-gnuradio] r8306 - gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu |
Date: |
Sat, 3 May 2008 13:25:34 -0600 (MDT) |
Author: eb
Date: 2008-05-03 13:25:32 -0600 (Sat, 03 May 2008)
New Revision: 8306
Modified:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
Log:
work-in-progress
Modified:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
===================================================================
---
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
2008-05-03 05:42:53 UTC (rev 8305)
+++
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/gc_spu_macs.h
2008-05-03 19:25:32 UTC (rev 8306)
@@ -1,4 +1,4 @@
-/* -*- c -*- */
+/* -*- asm -*- */
/*
* Copyright 2008 Free Software Foundation, Inc.
*
@@ -192,6 +192,18 @@
/*
* ----------------------------------------------------------------
+ * pseudo ops
+ * ----------------------------------------------------------------
+ */
+#define PROC_ENTRY(name) \
+ .text; \
+ .p2align 4; \
+ .global name; \
+ .type name, @function; \
+name:
+
+/*
+ * ----------------------------------------------------------------
* aliases for common operations
* ----------------------------------------------------------------
*/
@@ -206,7 +218,7 @@
#define RETURN() bi lr;
// hint for a return
-#define RETURN_HINT(ret_label) hbr ret_label, lr;
+#define HINT_RETURN(ret_label) hbr ret_label, lr;
// return if zero
#define BRZ_RETURN(rt) biz rt, lr;
@@ -222,7 +234,9 @@
/*
+ * ----------------------------------------------------------------
* modulo like things for constant moduli that are powers of 2
+ * ----------------------------------------------------------------
*/
// rt = ra & (pow2 - 1)
@@ -244,8 +258,10 @@
andi rt, rt, -(pow2);
/*
+ * ----------------------------------------------------------------
* Splat - replicate a particular slot into all slots
* Altivec analogs...
+ * ----------------------------------------------------------------
*/
// replicate byte from slot s [0,15]
@@ -271,4 +287,94 @@
rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot
*/ \
shufb rt, rt, rt, _gc_t0;
+/*
+ * ----------------------------------------------------------------
+ * lots of min/max variations...
+ *
+ * On a slot by slot basis, compute the min or max
+ *
+ * U - unsigned, else signed
+ * B,H,{} - byte, halfword, word
+ * F float
+ * ----------------------------------------------------------------
+ */
+
+#define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc;
+#define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc;
+
+ // words
+
+#define MIN(rt, ra, rb) \
+ cgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define MAX(rt, ra, rb) \
+ cgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMIN(rt, ra, rb) \
+ clgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define UMAX(rt, ra, rb) \
+ clgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+ // bytes
+
+#define MINB(rt, ra, rb) \
+ cgtb _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define MAXB(rt, ra, rb) \
+ cgtb _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMINB(rt, ra, rb) \
+ clgtb _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define UMAXB(rt, ra, rb) \
+ clgtb _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+ // halfwords
+
+#define MINH(rt, ra, rb) \
+ cgth _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define MAXH(rt, ra, rb) \
+ cgth _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+#define UMINH(rt, ra, rb) \
+ clgth _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define UMAXH(rt, ra, rb) \
+ clgth _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+ // floats
+
+#define FMIN(rt, ra, rb) \
+ fcgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+#define FMAX(rt, ra, rb) \
+ fcgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+// Ignoring the sign, select the values with the minimum magnitude
+#define FMINMAG(rt, ra, rb) \
+ fcmgt _gc_t0, ra, rb; \
+ MIN_SELB(rt, ra, rb, _gc_t0)
+
+// Ignoring the sign, select the values with the maximum magnitude
+#define FMAXMAG(rt, ra, rb) \
+ fcmgt _gc_t0, ra, rb; \
+ MAX_SELB(rt, ra, rb, _gc_t0)
+
+
#endif /* INCLUDED_GC_SPU_MACS_H */
Modified:
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
===================================================================
---
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
2008-05-03 05:42:53 UTC (rev 8305)
+++
gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu/memset.S
2008-05-03 19:25:32 UTC (rev 8306)
@@ -38,11 +38,6 @@
* }
*/
- .text
- .p2align 4
- .global memset_spe
- .type memset_spe, @function
-
#define p_arg arg1 // we're going to clobber arg1 w/ the return
value
#define c arg2 // the constant we're writing
#define n arg3 // how many bytes to write
@@ -57,25 +52,22 @@
#define cond1 r20
#define cond2 r21
#define m r22
-
-memset_spe:
+#define r r23
+
+ PROC_ENTRY(memset_spe2)
+
+ // Hint the return from do_head, in case we head that way.
+ // There's pretty much nothing to can do to hint the branch to it.
+ hbrr do_head_br, head_complete
+
MR(p, p_arg) // leaves p, the return value, in the correct reg (r3)
BRZ_RETURN(n)
- VSPLTB(c, c, 3) // splat byte in preferred slot of c into all slots
+ MODULO(t0, p, 16) // is p%16 == 0?
+ VSPLTB(c, c, 3) // splat byte in preferred slot of c into all
slots
+ brnz t0, do_head // no, handle it
+head_complete:
- // Is the modified region all within a single quad word?
-
- ROUND_DOWN(t0, p, 16)
- a t1, p, n
- ROUND_DOWN(t1, t1, 16)
- ceq t1, t0, t1
- brnz t1, single_quad_word // yes
-
- MODULO(t0, p, 16) // is p%16 == 0?
- brnz t0, do_head // no, handle it
-head_complete:
-
/*
* preconditions:
* p%16 == 0, n > 0
@@ -83,13 +75,13 @@
hbrr middle_loop_br, middle_loop
ROUND_DOWN(an, n, 16) // an is "aligned n"
+ MODULO(n, n, 16) // what's left over in the last quad
brz an, do_tail // no whole quad words; skip to tail
clgti t0, an, 127 // an >= 128?
- MODULO(n, n, 16) // what's left over in the last quad
brz t0, middle2 // nope, go handle the cases between 0 and 112
/*
- * start biting off 128-byte chunks!
+ * 128 bytes / iteration
*/
.p2align 4
middle_loop:
@@ -112,7 +104,7 @@
brnz cond1, middle_loop
/*
- * if an and n are both zero, return now
+ * if an and n are both zero, return now
*/
BRZ_RETURN(cond2)
@@ -136,24 +128,21 @@
LMR(p, next_p)
middle2_loop_br:
brnz an, middle2_loop
-
- /*
- * We're done with the full quadwords.
- */
+ /* We're done with the full quadwords. */
+
/*
* Handle the final partial quadword.
* We'll be modifying only the left hand portion of the quad.
*
- * an == 0, 0 <= n < 16, p%16 == 0
+ * preconditions:
+ * an == 0, 0 <= n < 16, p%16 == 0
*/
do_tail:
- BRZ_RETURN(n)
+ HINT_RETURN(do_tail_ret)
il mask, -1
- RETURN_HINT(do_tail_ret)
sfi t1, n, 16 // t1 = 16 - n
- lqd old, 0(p)
-
+ lqd old, 0(p)
shlqby mask, mask, t1
selb t0, old, c, mask
stqd t0, 0(p)
@@ -162,49 +151,35 @@
/*
* ----------------------------------------------------------------
- * Handle the right-hand side of the first quadword; align p.
+ * Handle the first partial quadword
*
* preconditions:
- * p%16 != 0, target area touches a mod 16 boundary
+ * p%16 != 0
*
- * postconditions:
- * p%16 == 0, n decremented
- * ----------------------------------------------------------------
+ * postconditions:
+ * p%16 == 0 or n == 0
+ *
+ * |-- m --|
+ * +----------------+----------------+
+ * | //////// | |
+ * +----------------+----------------+
+ * |----- r -----|
+ * p
+ * ----------------------------------------------------------------
*/
do_head:
- hbrr do_head_br, head_complete
- lqd t0, 0(p)
- MODULO_NEG(m, p, 16) // # of bytes to overwrite in right edge of t0
+ lqd old, 0(p)
+ MODULO_NEG(r, p, 16)
il mask, -1
+ UMIN(m, r, n)
shlqby mask, mask, m // 1's in the top, m*8 0's in the bottom
- selb t1, c, t0, mask
- stqd t1, 0(p)
- sf n, m, n // n -= m
+ MR(t1, p)
+ sf t0, m, r // t0 = r - m
a p, p, m // p += m
+ rotqby mask, mask, t0 // rotate 0's to the right place
+ sf n, m, n // n -= m
+ selb t0, c, old, mask // merge
+ stqd t0, 0(t1)
BRZ_RETURN(n)
do_head_br:
br head_complete
-
- /*
- * ----------------------------------------------------------------
- * 0 < n < 16 and all bytes are in a single quad word.
- *
- * We read the value pointed at by p (the old value)
- * then build a mask that selects which bits of
- * old and c we're going to use, then selb them and
- * store the new value back.
- * ----------------------------------------------------------------
- */
-single_quad_word:
- il mask, -1
- RETURN_HINT(sqw_return)
- MODULO(t0, p, 16)
- lqd old, 0(p)
- a t0, t0, n
- shlqby mask, mask, n // 1's in the top, n*8 0's in the bottom
- sfi t0, t0, 16 // t0 = 16 -(n + p%16)
- rotqby mask, mask, t0 // rotate the zero bits into the right
place
- selb t0, c, old, mask // t0 has the combination of old and c
- stqd t0, 0(p) // store it
-sqw_return:
- RETURN()
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r8306 - gnuradio/branches/developers/eb/gcell-wip/gcell/src/lib/general/spu,
eb <=