gcl-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Gcl-devel] possible dotimes enhancement


From: Camm Maguire
Subject: [Gcl-devel] possible dotimes enhancement
Date: 31 Jul 2003 17:42:32 -0400
User-agent: Gnus/5.09 (Gnus v5.9.0) Emacs/21.2

Greetings!  I was just looking at how gcl compiles a matrix
multiplication today.  In short, once one makes sure all the
declarations are kicking in, it appears as if it can get as fast as
native C.

There does appear to be a curent inefficiency in our dotimes
definition, as compared with do and do*.  The existing definition is:

(defmacro dotimes ((var form &optional (val nil)) &rest body
                                                  &aux (temp (gensym)))
  `(do* ((,temp ,form) (,var 0 (1+ ,var)))
        ((>= ,var ,temp) ,val)
        ,@body))

If one declares the type of var, optimizations still do not kick in
because the gensym'd temp has no declared type. 

I'm proposing the following:

(defmacro dotimes ((var form &optional (val nil)) &rest body
                                                  &aux (temp (gensym)) temp1 
temp2 temp3 temp4)
  `(do* ((,temp ,form) (,var 0 (1+ ,var)))
        ((>= ,var ,temp) ,val)
        ,(dolist (temp1 body temp4) 
             (when (eq (car temp1) 'declare) 
               (let ((temp2 (cadr temp1)))
                 (dolist (temp3 (cdr temp2))
                   (when (eq temp3 var)
                     (setq temp4 `(declare (,(car temp2) ,temp))))))))
        ,@body))

This basically will extend the user's possible declaration of var to
that of the integer evaluation of form stored in temp, as would appear
to make sense, as var has to count up to this value in any case.

With this enhancement in my preliminary testing, all loops of the form
'(dotimes (i form) (declare (fixnum i)) ...) have the counting variable
incrementation and comparison fully optimized, i.e. avoiding the
generic number_compare, etc.

So what do the lisp experts think?  Am I getting into trouble?

I've included a matrix multiply compiler output with this enhancement
turned on for those interested.

Separately, at some point we have to figure out why the optimizer
cannot currently optimize

(declare (fixnum i)) (aref a (+ i 1))

but only

(let ((m 0)) (declare (fixnum m i)) (setq m (+ i 1)) (aref a m))

Take care,

=============================================================================
(defun matmul (a b c n m k)
  (declare (optimize (speed 3) (safety 0))
           (type (array long-float) a b c)
           (fixnum n m k))
  (let ((sum 0.0)
        (i1 (- m))
        (k2 0))
    (declare (type long-float sum) (type fixnum i1 k2))
    (dotimes (i n c)
      (declare (fixnum i))
      (setf i1 (+ i1 m)) ;; i1=i*m
      (dotimes (j k)
        (declare (fixnum j))
        (setf sum 0.0)
        (setf k2 (- k))
        (dotimes (l m)
          (declare (fixnum l))
          (let ((mi1 0) (mk2 0))
;          (declare (fixnum  j))
;          (declare (fixnum  k))
;          (declare (fixnum  i1))
;          (declare (fixnum  k2))
            (declare (fixnum  mi1))
            (declare (fixnum  mk2))
          (setf k2 (+ k2 k)) ;; k2= l*k
          (setf mi1 (+ i1 l))
          (setf mk2 (+ k2 j))
          (setf sum (the long-float (+ (the long-float sum) 
                                               (the long-float (* (aref a mi1)
                                                                          (aref 
b mk2))))))))
        (let ((mi1 0)) (declare (fixnum mi1 i j)) (setq mi1 (+ i1 j))
        (setf (aref c mi1) sum))))))
=============================================================================
static void L1()
{register object *base=vs_base;
        register object *sup=base+VM1; VC1
        vs_check;
        {object V1;
        object V2;
        object V3;
        long V4;
        long V5;
        long V6;
        V1=(base[0]);
        V2=(base[1]);
        V3=(base[2]);
        V4=fix(base[3]);
        V5=fix(base[4]);
        V6=fix(base[5]);
        vs_top=sup;
        goto TTL;
TTL:;
        {register double V7;
        long V8;
        register long V9;
        V7=     0.    ;
        V8= (long)-(V5);
        V9= 0;
        {long V10;
        long V11;
        V10= V4;
        V11= 0;
        goto T7;
T7:;
        if(!((V11)>=(V10))){
        goto T8;}
        base[6]= (V3);
        vs_top=(vs_base=base+6)+1;
        return;
        goto T8;
T8:;
        V8= (long)(V8)+(V5);
        {long V12;
        register long V13;
        V12= V6;
        V13= 0;
        goto T18;
T18:;
        if(!((V13)>=(V12))){
        goto T19;}
        goto T14;
        goto T19;
T19:;
        V7=     0.    ;
        V9= (long)-(V6);
        {long V14;
        register long V15;
        V14= V5;
        V15= 0;
        goto T31;
T31:;
        if(!((V15)>=(V14))){
        goto T32;}
        goto T27;
        goto T32;
T32:;
        {register long V16;
        register long V17;
        V16= 0;
        V17= 0;
        V9= (long)(V9)+(V6);
        V16= (long)(V8)+(V15);
        V17= (long)(V9)+(V13);
        V7= 
(double)(V7)+(double)((double)(((V1))->lfa.lfa_self[V16])*(double)(((V2))->lfa.lfa_self[V17]));}
        V15= (long)(V15)+1;
        goto T31;}
        goto T27;
T27:;
        {long V18;
        V18= 0;
        V18= (long)(V8)+(V13);
        (void)(((V3))->lfa.lfa_self[V18]= (V7));}
        V13= (long)(V13)+1;
        goto T18;}
        goto T14;
T14:;
        V11= (long)(V11)+1;
        goto T7;}}
        }
}
=============================================================================

-- 
Camm Maguire                                            address@hidden
==========================================================================
"The earth is but one country, and mankind its citizens."  --  Baha'u'llah




reply via email to

[Prev in Thread] Current Thread [Next in Thread]