commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r7833 - in gnuradio/branches/developers/eb/gcell/src:


From: eb
Subject: [Commit-gnuradio] r7833 - in gnuradio/branches/developers/eb/gcell/src: include lib lib/spu
Date: Tue, 26 Feb 2008 10:10:27 -0700 (MST)

Author: eb
Date: 2008-02-26 10:10:26 -0700 (Tue, 26 Feb 2008)
New Revision: 7833

Modified:
   gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc.h
   gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc_private.h
   gnuradio/branches/developers/eb/gcell/src/lib/gc_job_manager_impl.cc
   gnuradio/branches/developers/eb/gcell/src/lib/spu/gc_spu_procs.c
   gnuradio/branches/developers/eb/gcell/src/lib/spu/gcell_spu_main.c
   gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.c
   gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.h
Log:
work-in-progress on DMA'ing args back to PPE at all alignments and sizes

Modified: gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc.h
===================================================================
--- gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc.h     
2008-02-25 20:10:32 UTC (rev 7832)
+++ gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc.h     
2008-02-26 17:10:26 UTC (rev 7833)
@@ -138,27 +138,24 @@
  * These are DMA'd between EA and LS as specified.
  */
 typedef struct gc_job_ea_arg {
-  //! EA address of buffer (in)
+  //! EA address of buffer
   gc_eaddr_t     ea_addr;      
 
-  //! GC_JD_DMA_* get arg or put arg (in)
+  //! GC_JD_DMA_* get arg or put arg
   uint32_t      direction;
 
-  //! number of bytes to get (in)
+  //! number of bytes to get
   uint32_t      get_size;         
 
-  //! maximum number of bytes to put (in: GCJD_DMA_PUT)
-  uint32_t      max_put_size;
+  //! number of bytes to put
+  uint32_t      put_size;
 
-  //! actual number of bytes put (out: GCJD_DMA_PUT)
-  uint32_t      actual_put_size;       // must be <= max_put_size
-
 #if defined(__SPU__)
   //! local store address (filled in by SPU runtime)
   void         *ls_addr;
-  uint32_t      _pad[1];
+  uint32_t      _pad[2];
 #else
-  uint32_t       _pad[2];
+  uint32_t       _pad[3];
 #endif
 
 } _AL16 gc_job_ea_arg_t;
@@ -191,7 +188,7 @@
  */
 typedef void (*gc_spu_proc_t)(const gc_job_direct_args_t *input,
                              gc_job_direct_args_t *output,
-                             gc_job_ea_args_t *eaa);
+                             const gc_job_ea_args_t *eaa);
 
 #if !defined(__SPU__)
 

Modified: 
gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc_private.h
===================================================================
--- gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc_private.h     
2008-02-25 20:10:32 UTC (rev 7832)
+++ gnuradio/branches/developers/eb/gcell/src/include/gc_job_desc_private.h     
2008-02-26 17:10:26 UTC (rev 7833)
@@ -29,9 +29,10 @@
  */
 typedef struct gc_job_desc_private
 {
-  gc_eaddr_t   next;           // used to implement job queue and free list
+  gc_eaddr_t   next;               // used to implement job queue and free list
   uint16_t     job_id;
   uint16_t     client_id;
+  uint32_t     direction_union;    // union of all gc_job_ea_arg.direction 
fields
 } gc_job_desc_private_t;
 
 #endif /* INCLUDED_GC_JOB_PRIVATE_H */

Modified: gnuradio/branches/developers/eb/gcell/src/lib/gc_job_manager_impl.cc
===================================================================
--- gnuradio/branches/developers/eb/gcell/src/lib/gc_job_manager_impl.cc        
2008-02-25 20:10:32 UTC (rev 7832)
+++ gnuradio/branches/developers/eb/gcell/src/lib/gc_job_manager_impl.cc        
2008-02-26 17:10:26 UTC (rev 7833)
@@ -441,8 +441,10 @@
     return false;
   }
 
+  uint32_t dir_union = 0;
+
   for (unsigned int i = 0; i < p->nargs; i++){
-
+    dir_union |= p->arg[i].direction;
     switch(p->arg[i].direction){
     case GCJD_DMA_GET:
     case GCJD_DMA_PUT:
@@ -454,6 +456,17 @@
     }
   }
 
+  if (p->nargs > 1){
+    unsigned int common_eah = (p->arg[0].ea_addr) >> 32;
+    for (unsigned int i = 1; i < p->nargs; i++){
+      if ((p->arg[i].ea_addr >> 32) != common_eah){
+       jd->status = JS_BAD_EAH;
+       return false;
+      }
+    }
+  }
+
+  jd->sys.direction_union = dir_union;
   return true;
 }
 

Modified: gnuradio/branches/developers/eb/gcell/src/lib/spu/gc_spu_procs.c
===================================================================
--- gnuradio/branches/developers/eb/gcell/src/lib/spu/gc_spu_procs.c    
2008-02-25 20:10:32 UTC (rev 7832)
+++ gnuradio/branches/developers/eb/gcell/src/lib/spu/gc_spu_procs.c    
2008-02-26 17:10:26 UTC (rev 7833)
@@ -31,14 +31,14 @@
 void
 gcp_qa_nop(const gc_job_direct_args_t *input _UNUSED,
           gc_job_direct_args_t *output _UNUSED,
-          gc_job_ea_args_t *eaa _UNUSED)
+          const gc_job_ea_args_t *eaa _UNUSED)
 {
 }
 
 void
 gcp_qa_udelay(const gc_job_direct_args_t *input,
              gc_job_direct_args_t *output _UNUSED,
-             gc_job_ea_args_t *eaa _UNUSED)
+             const gc_job_ea_args_t *eaa _UNUSED)
 {
   gc_udelay(input->arg[0].u32);
 }
@@ -56,7 +56,7 @@
 void
 gcp_qa_sum_shorts(const gc_job_direct_args_t *input _UNUSED,
                  gc_job_direct_args_t *output,
-                 gc_job_ea_args_t *eaa)
+                 const gc_job_ea_args_t *eaa)
 {
   for (unsigned int i = 0; i < eaa->nargs; i++){
     short *p = eaa->arg[i].ls_addr;

Modified: gnuradio/branches/developers/eb/gcell/src/lib/spu/gcell_spu_main.c
===================================================================
--- gnuradio/branches/developers/eb/gcell/src/lib/spu/gcell_spu_main.c  
2008-02-25 20:10:32 UTC (rev 7832)
+++ gnuradio/branches/developers/eb/gcell/src/lib/spu/gcell_spu_main.c  
2008-02-26 17:10:26 UTC (rev 7833)
@@ -50,9 +50,19 @@
 
 static gc_spu_args_t   spu_args;
 
+// ------------------------------------------------------------------------
+
+// state for DMA'ing arguments in and out
+
 static int get_tag;            // 1 tag for job arg gets
 static int put_tags;           // 2 tags for job arg puts
 
+static int pbi = 0;            // current put buffer index (0 or 1)
+
+// bitmask (bit per put buffer): bit is set if DMA is started but not complete
+static int put_in_progress = 0;
+#define PBI_MASK(_pbi_) (1 << (_pbi_))
+
 // ------------------------------------------------------------------------
 
 // our working copy of the completion info
@@ -108,10 +118,21 @@
   // dma the comp_info out to PPE
   int tag = ci_tags + ci_idx;
   mfc_put(&comp_info, spu_args.comp_info[ci_idx], sizeof(gc_comp_info_t), tag, 
0, 0);
-  mfc_write_tag_mask(1 << tag);                // the tag we're interested in
+
+  // we need to wait for the completion info to finish, as well as
+  // any EA argument puts.
+
+  int tag_mask = 1 << tag;             // the comp_info tag
+  if (put_in_progress & PBI_MASK(0))
+    tag_mask |= (1 << (put_tags + 0));
+  if (put_in_progress & PBI_MASK(1))
+    tag_mask |= (1 << (put_tags + 1));
+
+  mfc_write_tag_mask(tag_mask);                // the tags we're interested in
   mfc_read_tag_status_all();           // wait for DMA to complete
+  put_in_progress = 0;                 // mark them all complete
 
-  // send it a message
+  // send PPE a message
   spu_writech(SPU_WrOutIntrMbox, MK_MBOX_MSG(OP_JOBS_DONE, ci_idx));
 
   ci_idx ^= 0x1;       // switch buffers
@@ -199,21 +220,39 @@
        MAX(MAX_ARGS_EA,
            (GC_SPU_BUFSIZE + MFC_MAX_DMA_SIZE - 1) / MFC_MAX_DMA_SIZE);
 
-      mfc_list_element_t  dma_list[NELMS];
-      memset(dma_list, 0, sizeof(dma_list));
-      int li = 0;
+      mfc_list_element_t  dma_get_list[NELMS];
+      //mfc_list_element_t  dma_put_list[NELMS];
+      
+      memset(dma_get_list, 0, sizeof(dma_get_list));
+      //memset(dma_put_list, 0, sizeof(dma_put_list));
 
+      int gli = 0;     // get list index
+      //int pli = 0;   // put list index
+
+      unsigned char *get_base = _gci_getbuf[0];
+      unsigned char *get_t = get_base;
+      unsigned int   total_get_dma_len = 0;
+
+      unsigned char *put_base = _gci_putbuf[pbi];
+      unsigned char *put_t = put_base;
+      unsigned int   total_put_alloc = 0;
+      int           put_tag = put_tags + pbi;
+
+      // Do we have any "put" args?  If so ensure that previous
+      // dma from this buffer is complete
+
+      if ((jd->sys.direction_union & GCJD_DMA_PUT)
+         && (put_in_progress & PBI_MASK(pbi))){
+
+       mfc_write_tag_mask(1 << put_tag);       // the tag we're interested in
+       mfc_read_tag_status_all();              // wait for DMA to complete
+       put_in_progress &= ~(PBI_MASK(pbi));
+      }
+
+
       // for now, all EA's must have the same high 32-bits
       gc_eaddr_t common_ea = eaa->arg[0].ea_addr;
-      unsigned int common_eah = mfc_ea2h(common_ea);
 
-      unsigned char *gb_base = gc_getbuf[0];
-      unsigned char *p = gb_base;
-      unsigned int total_get_dma_len = 0;
-      
-      // unsigned int pbi = 0;                 // put buffer index
-      // unsigned char *pb_base = gc_putbuf[pbi];
-      
 
       // assign LS addresses for buffers
       
@@ -226,11 +265,6 @@
 
        if (eaa->arg[i].direction == GCJD_DMA_GET){
          ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE);
-         if (mfc_ea2h(ea_base) != common_eah){
-           jd->status = JS_BAD_EAH;
-           goto wrap_up;
-         }
-
          offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1);
          dma_len = ROUND_UP(eaa->arg[i].get_size + offset, CACHE_LINE_SIZE);
          total_get_dma_len += dma_len;
@@ -240,8 +274,8 @@
            goto wrap_up;
          }
 
-         ls_base = p;
-         p += dma_len;
+         ls_base = get_t;
+         get_t += dma_len;
          eaa->arg[i].ls_addr = ls_base + offset;
 
          if (0){
@@ -261,32 +295,179 @@
            printf("  ls_addr   = %p\n", eaa->arg[i].ls_addr);
          }
          
-         // add to dma list
-         // FIXME (someday) this is where the JS_BAD_EAH limitation comes from
+         // add to dma get list 
+         // FIXME (someday) the dma lists is where the JS_BAD_EAH limitation 
comes from
+
          while (dma_len != 0){
            int n = MIN(dma_len, MFC_MAX_DMA_SIZE);
-           dma_list[li].size = n;
-           dma_list[li].eal = mfc_ea2l(ea_base);
+           dma_get_list[gli].size = n;
+           dma_get_list[gli].eal = mfc_ea2l(ea_base);
            if (0){
-             printf("  dma_list[%d].size = %6d\n", li, dma_list[li].size);
-             printf("  dma_list[%d].eal  = 0x%08x\n", li, dma_list[li].eal);
+             printf("  dma_get_list[%d].size = %6d\n", gli, 
dma_get_list[gli].size);
+             printf("  dma_get_list[%d].eal  = 0x%08x\n", gli, 
dma_get_list[gli].eal);
            }
            dma_len -= n;
            ea_base += n;
-           li++;
+           gli++;
          }
        }
+
+       else if (eaa->arg[i].direction == GCJD_DMA_PUT){
+         //
+         // This case is a trickier than the PUT case since we can't
+         // write outside of the bounds of the user provided buffer.
+         // We still align the buffers to 128-bytes for good performance
+         // in the middle portion of the xfers.
+         //
+         ea_base = ROUND_DN(eaa->arg[i].ea_addr, (gc_eaddr_t) CACHE_LINE_SIZE);
+         offset = eaa->arg[i].ea_addr & (CACHE_LINE_SIZE-1);
+
+         uint32_t ls_alloc_len =
+           ROUND_UP(eaa->arg[i].put_size + offset, CACHE_LINE_SIZE);
+
+         total_put_alloc += ls_alloc_len;
+
+         if (total_put_alloc > GC_SPU_BUFSIZE){
+           jd->status = JS_ARGS_TOO_LONG;
+           goto wrap_up;
+         }
+
+         ls_base = put_t;
+         put_t += ls_alloc_len;
+         eaa->arg[i].ls_addr = ls_base + offset;
+
+         if (1){
+           assert((mfc_ea2l(eaa->arg[i].ea_addr) & 0x7f) == 
((intptr_t)eaa->arg[i].ls_addr & 0x7f));
+           assert((ea_base & 0x7f) == 0);
+           assert(((intptr_t)ls_base & 0x7f) == 0);
+         }
+       }
+
+       else
+         assert(0);
       }
 
       // fire off the dma to fetch the args and wait for it to complete
-      mfc_getl(gb_base, common_ea, dma_list, li*sizeof(dma_list[0]), get_tag, 
0, 0);
+      mfc_getl(get_base, common_ea, dma_get_list, gli*sizeof(dma_get_list[0]), 
get_tag, 0, 0);
       mfc_write_tag_mask(1 << get_tag);                // the tag we're 
interested in
       mfc_read_tag_status_all();               // wait for DMA to complete
 
       // do the work
       (*gc_proc_table[jd->proc_id])(&jd->input, &jd->output, eaa);
 
-      // FIXME copy EA args out
+      // Do we have any "put" args?  If so copy them out
+      if (jd->sys.direction_union & GCJD_DMA_PUT){
+
+       // Do the copy out using single DMA xfers.  The LS ranges
+       // aren't generally contiguous.
+       
+       bool started_dma = false;
+
+       for (unsigned int i = 0; i < eaa->nargs; i++){
+         if (eaa->arg[i].direction == GCJD_DMA_PUT && eaa->arg[i].put_size != 
0){
+           
+           started_dma = true;
+
+           gc_eaddr_t       ea;
+           unsigned char   *ls;
+           int              len;
+
+           ea = eaa->arg[i].ea_addr;
+           ls = (unsigned char *) eaa->arg[i].ls_addr;
+           len = eaa->arg[i].put_size;
+
+           if ((ea & 0xf) != 0){
+
+             // handle the "pre-multiple-of-16" portion
+             // do 1, 2, 4, or 8 byte xfers as required
+
+             if ((ea & 0x1) && len >= 1){              // do a 1-byte xfer
+               mfc_put(ls, ea, 1, put_tag, 0, 0);
+               ea += 1;
+               ls += 1;
+               len -= 1;
+             }
+             if ((ea & 0x2) && len >= 2){              // do a 2-byte xfer
+               mfc_put(ls, ea, 2, put_tag, 0, 0);
+               ea += 2;
+               ls += 2;
+               len -= 2;
+             }
+             if ((ea & 0x4) && len >= 4){              // do a 4-byte xfer
+               mfc_put(ls, ea, 4, put_tag, 0, 0);
+               ea += 4;
+               ls += 4;
+               len -= 4;
+             }
+             if ((ea & 0x8) && len >= 8){              // do an 8-byte xfer
+               mfc_put(ls, ea, 8, put_tag, 0, 0);
+               ea += 8;
+               ls += 8;
+               len -= 8;
+             }
+           }
+
+           if (1){
+             assert((ea & 0xf) == 0);
+             assert((((intptr_t) ls) & 0xf) == 0);
+           }
+
+           // handle the "multiple-of-16" portion
+
+           int aligned_len = ROUND_DN(len, 16);
+           len = len & (16 - 1);
+
+           while (aligned_len != 0){
+             int dma_len = MIN(aligned_len, MFC_MAX_DMA_SIZE);
+             mfc_put(ls, ea, dma_len, put_tag, 0, 0);
+             ea += dma_len;
+             ls += dma_len;
+             aligned_len -= dma_len;
+           }
+
+           if (1){
+             assert((ea & 0xf) == 0);
+             assert((((intptr_t) ls) & 0xf) == 0);
+           }
+
+           // handle "post-multiple-of-16" portion
+
+           if (len != 0){
+
+             if (len >= 8){                            // do an 8-byte xfer
+               mfc_put(ls, ea, 8, put_tag, 0, 0);
+               ea += 8;
+               ls += 8;
+               len -= 8;
+             }
+             if (len >= 4){                            // do a 4-byte xfer
+               mfc_put(ls, ea, 4, put_tag, 0, 0);
+               ea += 4;
+               ls += 4;
+               len -= 4;
+             }
+             if (len >= 2){                            // do a 2-byte xfer
+               mfc_put(ls, ea, 2, put_tag, 0, 0);
+               ea += 2;
+               ls += 2;
+               len -= 2;
+             }
+             if (len >= 1){                            // do a 1-byte xfer
+               mfc_put(ls, ea, 1, put_tag, 0, 0);
+               ea += 1;
+               ls += 1;
+               len -= 1;
+             }
+             if (1)
+               assert(len == 0);
+           }
+         }
+       }
+       if (started_dma){
+         put_in_progress |= PBI_MASK(pbi);             // note it's running
+         pbi ^= 1;                                     // toggle current buffer
+       }
+      }
     }
   }
 

Modified: gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.c
===================================================================
--- gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.c     
2008-02-25 20:10:32 UTC (rev 7832)
+++ gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.c     
2008-02-26 17:10:26 UTC (rev 7833)
@@ -25,23 +25,11 @@
 static unsigned char _getbuf[NGETBUFS][GC_SPU_BUFSIZE] _AL128;
 static unsigned char _putbuf[NPUTBUFS][GC_SPU_BUFSIZE] _AL128;
 
-unsigned char *gc_getbuf[NGETBUFS] = {
+unsigned char *_gci_getbuf[NGETBUFS] = {
   _getbuf[0]
 };
 
-unsigned char *gc_putbuf[NPUTBUFS] = {
+unsigned char *_gci_putbuf[NPUTBUFS] = {
   _putbuf[0],
   _putbuf[1]
 };
-
-int 
-gc_alloc_getbuf()
-{
-  return 0;
-}
-
-int
-gc_alloc_putbuf()
-{
-  return 0;
-}

Modified: gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.h
===================================================================
--- gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.h     
2008-02-25 20:10:32 UTC (rev 7832)
+++ gnuradio/branches/developers/eb/gcell/src/lib/spu/spu_buffers.h     
2008-02-26 17:10:26 UTC (rev 7833)
@@ -23,13 +23,10 @@
 
 #include "gc_spu_config.h"
 
-extern unsigned char *gc_getbuf[NGETBUFS];
-extern unsigned char *gc_putbuf[NPUTBUFS];
+//! pointer to input buffer
+extern unsigned char *_gci_getbuf[NGETBUFS];
 
-//! return the index of an available getbuf
-int gc_alloc_getbuf(void);
+//! pointers to output buffers
+extern unsigned char *_gci_putbuf[NPUTBUFS];
 
-//! return the index of an available putbuf
-int gc_alloc_putbuf(void);
-
 #endif /* INCLUDED_SPU_BUFFERS_H */





reply via email to

[Prev in Thread] Current Thread [Next in Thread]