guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Compiler memory consumption


From: Ludovic Courtès
Subject: Re: Compiler memory consumption
Date: Tue, 12 Sep 2017 11:06:14 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux)

Hello,

This is a followup to the compiler issue, this time focusing on
execution time, trying to see why the compiler takes so long to compile
gnu/packages/*.scm, which should be trivial.

As a test bed, I built this from the Guix tree:

  GUILE_LOAD_COMPILED_PATH=$PWD guild compile \
    -L $PWD -o t.cps --to=cps gnu/packages/python.scm

This results in a 428,185-line file, with that many CPS labels.

If we profile the ‘emit-bytecode’ procedure of (language cps
compile-bytecode), we get this:

--8<---------------cut here---------------start------------->8---
$ guile --debug profile-cps.scm

;;; (go!)
GC Warning: Repeated allocation of very large block (appr. size 268439552):
        May lead to memory leak and poor performance
%     cumulative   self             
time   seconds     seconds  procedure
 15.41     54.63     10.25  language/cps/intset.scm:468:5:visit-branch
 10.72      7.13      7.13  language/cps/intmap.scm:394:0:intmap-ref
  7.97     20.31      5.30  language/cps/intset.scm:547:2:union
  6.63      4.41      4.41  vector-copy
  5.96      3.96      3.96  language/cps/intmap.scm:184:0:intmap-add!
  4.91 236177.43      3.27  language/cps/intset.scm:470:5:visit-branch
  4.50     23.45      3.00  language/cps/intmap.scm:247:2:adjoin
  3.13      3.89      2.08  language/cps/intset.scm:269:0:intset-add
  2.72     12.75      1.81  language/cps/intmap.scm:246:0:intmap-add
  2.31    384.90      1.54  language/cps/intmap.scm:515:5:visit-branch
  2.12      1.41      1.41  language/cps/intset.scm:149:0:add-level
  1.94      5.67      1.29  language/cps/intmap.scm:104:0:clone-branch-and-set
  1.86      1.24      1.24  language/cps/intset.scm:349:0:intset-ref
  1.79      2.15      1.19  language/cps/intset.scm:270:2:adjoin
  1.79      1.24      1.19  language/cps/intset.scm:204:0:intset-add!
  1.64      1.41      1.09  ice-9/vlist.scm:539:0:vhash-assq
  1.60      1.06      1.06  language/cps/intset.scm:466:2:intset-fold
  1.45      4.23      0.97  language/cps/intset.scm:382:2:visit-node
  1.08      2.53      0.72  language/cps/intset.scm:316:2:remove
  1.01      6.76      0.67  language/cps/intset.scm:496:0:intset-union
  0.89      0.64      0.59  language/cps/intmap.scm:290:0:intmap-replace!
  0.82      0.54      0.54  system/vm/linker.scm:176:0:string-table-intern!
  0.78      0.59      0.52  language/cps/intset.scm:579:0:intset-intersect
  0.74      0.50      0.50  language/cps/intset.scm:155:0:make-intset/prune
  0.67      0.59      0.45  language/cps/intset.scm:725:2:subtract-nodes
  0.63      0.54      0.42  ice-9/vlist.scm:534:0:vhash-assoc
  0.56      8.82      0.37  language/cps/utils.scm:514:0:solve-flow-equations
  0.52      0.35      0.35  put-u8
  0.52      0.35      0.35  hashq
  0.41      6.31      0.27  srfi/srfi-1.scm:458:2:fold
  0.41      0.27      0.27  language/cps/utils.scm:381:15
  0.34 236148.90      0.22  language/cps/utils.scm:364:2:visit
  0.34      1.04      0.22  language/cps/intset.scm:315:0:intset-remove
  0.30      1.93      0.20  language/cps/utils.scm:394:4:visit-scc
  0.26      3.32      0.17  language/cps/utils.scm:241:3:visit-cont
  0.26      2.10      0.17  language/cps/compile-bytecode.scm:539:4:compile-cont
  0.26      0.82      0.17  language/cps/slot-allocation.scm:156:4
  0.26      0.35      0.17  language/cps/slot-allocation.scm:792:3
  0.26      0.17      0.17  language/cps/utils.scm:383:18
  0.22      3.44      0.15  
language/cps/slot-allocation.scm:1015:4:allocate-cont
  0.22      0.77      0.15  language/cps/intset.scm:694:0:intset-subtract
  0.22      0.64      0.15  language/cps/slot-allocation.scm:278:16
  0.22      0.45      0.15  language/cps/intset.scm:755:8:lp
  0.22      0.27      0.15  language/cps/slot-allocation.scm:325:3
  0.22      0.17      0.15  language/cps/slot-allocation.scm:400:16
  0.22      0.15      0.15  gensym
  0.22      0.15      0.15  assq-ref
  0.22      0.15      0.15  hash
  0.19      4.19      0.12  language/cps/utils.scm:127:16
  0.19      1.21      0.12  
language/cps/slot-allocation.scm:636:2:compute-shuffles
  0.19      1.09      0.12  language/cps/intset.scm:367:0:intset-next
  0.19      1.02      0.12  system/vm/assembler.scm:2019:2:write-arity
  0.19      0.69      0.12  language/cps/slot-allocation.scm:874:19
  0.19      0.17      0.12  ice-9/vlist.scm:449:0:vhash-cons
  0.15      8.84      0.10  language/cps/utils.scm:306:4:visit
  0.15      2.77      0.10  language/cps/intmap.scm:524:5:visit-branch
  0.15      1.63      0.10  system/vm/linker.scm:357:0:add-symbols
  0.15      0.89      0.10  language/cps/utils.scm:367:23
  0.15      0.45      0.10  system/vm/assembler.scm:2249:0:link-debug
  0.15      0.30      0.10  language/cps/slot-allocation.scm:677:2:measure-cont
  0.15      0.20      0.10  
language/cps/slot-allocation.scm:873:4:compute-live-slots*
  0.15      0.10      0.10  system/vm/assembler.scm:1722:18
  0.15      0.10      0.10  language/cps/intset.scm:114:0:leaf-empty?
  0.15      0.10      0.10  language/cps/intmap.scm:127:0:add-level
  0.11     16.02      0.07  language/cps/intmap.scm:517:5:visit-branch
  0.11      1.06      0.07  language/cps/slot-allocation.scm:415:17
  0.11      0.52      0.07  srfi/srfi-1.scm:634:2:for-each
  0.11      0.22      0.07  language/cps/intmap.scm:530:0:intmap-union
  0.11      0.15      0.07  language/cps/utils.scm:273:5
  0.11      0.12      0.07  system/vm/assembler.scm:1970:0:put-uleb128
  0.11      0.10      0.07  language/cps/intmap.scm:578:2:union
  0.11      0.07      0.07  language/cps/utils.scm:388:27
  0.11      0.07      0.07  language/cps/compile-bytecode.scm:72:16
  0.11      0.07      0.07  language/cps/utils.scm:425:17
  0.11      0.07      0.07  language/cps/intmap.scm:167:0:persistent-intmap
  0.11      0.07      0.07  list?
  0.11      0.07      0.07  length+
  0.11      0.07      0.07  hashq-set!
  0.11      0.07      0.07  system/vm/assembler.scm:1302:0:emit-definition
  0.11      0.07      0.07  language/cps/intset.scm:632:2:intersect
  0.11      0.07      0.07  system/vm/assembler.scm:957:0:immediate-bits
  0.07      2.28      0.05  language/cps/utils.scm:432:2:component-successors
  0.07      2.25      0.05  language/cps/utils.scm:509:0:intset-pop
  0.07      0.72      0.05  language/cps/slot-allocation.scm:904:4:allocate
  0.07      0.62      0.05  language/cps/slot-allocation.scm:768:2:allocate-lazy
  0.07      0.35      0.05  language/cps/utils.scm:395:6:visit
  0.07      0.35      0.05  language/cps/utils.scm:402:5
  0.07      0.25      0.05  language/cps/utils.scm:434:18
  0.07      0.17      0.05  language/cps/slot-allocation.scm:726:17
  0.07      0.07      0.05  
language/cps/slot-allocation.scm:555:2:compute-live-slots
  0.07      0.07      0.05  ice-9/boot-9.scm:1038:0:compose
  0.07      0.05      0.05  language/cps/intset.scm:178:0:transient-intset
  0.07      0.05      0.05  language/cps/slot-allocation.scm:436:4:add
  0.07      0.05      0.05  system/vm/assembler.scm:1299:0:emit-source
  0.07      0.05      0.05  max
  0.07      0.05      0.05  logbit?
  0.07      0.05      0.05  reverse
  0.07      0.05      0.05  language/cps/slot-allocation.scm:303:35
  0.07      0.05      0.05  language/cps/utils.scm:206:6
  0.07      0.05      0.05  language/cps/utils.scm:231:19
  0.04  33421.25      0.02  srfi/srfi-1.scm:590:5:map1
  0.04     26.55      0.02  
language/cps/slot-allocation.scm:195:0:compute-reverse-control-flow-order
  0.04      3.19      0.02  system/vm/assembler.scm:1024:0:intern-constant
  0.04      1.54      0.02  language/cps/slot-allocation.scm:289:16
  0.04      0.89      0.02  language/cps/utils.scm:531:31
  0.04      0.40      0.02  language/cps/slot-allocation.scm:454:21
  0.04      0.25      0.02  language/cps/slot-allocation.scm:918:4:allocate*
  0.04      0.25      0.02  language/cps/slot-allocation.scm:202:25
  0.04      0.22      0.02  language/cps/compile-bytecode.scm:567:19
  0.04      0.22      0.02  language/cps/utils.scm:436:21
  0.04      0.17      0.02  language/cps/slot-allocation.scm:284:15
  0.04      0.17      0.02  language/cps/slot-allocation.scm:787:2:get-defs
  0.04      0.15      0.02  language/cps/slot-allocation.scm:556:17
  0.04      0.15      0.02  language/cps/utils.scm:191:16
  0.04      0.12      0.02  language/cps/slot-allocation.scm:868:6:lp
  0.04      0.10      0.02  language/cps/slot-allocation.scm:547:4:lp
  0.04      0.10      0.02  ice-9/boot-9.scm:261:2:for-each
  0.04      0.07      0.02  system/vm/assembler.scm:1387:0:link-data
  0.04      0.07      0.02  language/cps/slot-allocation.scm:482:0:integers
  0.04      0.07      0.02  
language/cps/slot-allocation.scm:487:0:solve-parallel-move
  0.04      0.05      0.02  
language/cps/slot-allocation.scm:725:2:compute-live-slots
  0.04      0.05      0.02  
language/cps/slot-allocation.scm:463:0:find-first-zero
  0.04      0.05      0.02  language/cps/intmap.scm:431:2:visit-node
  0.04      0.05      0.02  language/cps/slot-allocation.scm:823:20
  0.04      0.05      0.02  
language/cps/compile-bytecode.scm:498:4:skip-elided-conts
  0.04      0.02      0.02  
language/cps/slot-allocation.scm:130:0:lookup-call-proc-slot
  0.04      0.02      0.02  language/cps/slot-allocation.scm:283:0:rename-intset
  0.04      0.02      0.02  language/cps/intmap.scm:366:0:intmap-remove
  0.04      0.02      0.02  language/cps/compile-bytecode.scm:92:49
  0.04      0.02      0.02  language/cps/utils.scm:314:34
  0.04      0.02      0.02  language/cps/slot-allocation.scm:765:23
  0.04      0.02      0.02  language/cps/intset.scm:138:0:branch-empty?
  0.04      0.02      0.02  
system/vm/assembler.scm:701:0:encode-X8_S24-R32-N32-N32-B1_X31
  0.04      0.02      0.02  language/cps/utils.scm:458:10
  0.04      0.02      0.02  system/vm/linker.scm:418:0:process-reloc
  0.04      0.02      0.02  
system/vm/assembler.scm:701:0:encode-X8_F24-B1_X7_C24
  0.04      0.02      0.02  language/cps/utils.scm:134:16
  0.04      0.02      0.02  language/cps/slot-allocation.scm:696:15
  0.04      0.02      0.02  system/vm/assembler.scm:1361:0:write-immediate
  0.04      0.02      0.02  language/cps/slot-allocation.scm:267:3
  0.04      0.02      0.02  %after-gc-thunk
  0.04      0.02      0.02  make-bytevector
  0.04      0.02      0.02  
language/cps/slot-allocation.scm:108:0:lookup-representation
  0.04      0.02      0.02  system/vm/assembler.scm:936:12
  0.04      0.02      0.02  language/cps/compile-bytecode.scm:105:4:from-sp
  0.04      0.02      0.02  language/cps/intset.scm:187:0:persistent-intset
  0.04      0.02      0.02  min
  0.04      0.02      0.02  system/vm/assembler.scm:1689:3
  0.04      0.02      0.02  language/cps/compile-bytecode.scm:463:31
  0.04      0.02      0.02  system/vm/elf.scm:929:0:write-elf64-symbol
  0.04      0.02      0.02  
language/cps/slot-allocation.scm:470:0:find-first-trailing-zero
  0.04      0.02      0.02  srfi/srfi-1.scm:687:0:filter-map
  0.04      0.02      0.02  bytevector-u64-set!
  0.04      0.02      0.02  
language/cps/slot-allocation.scm:102:0:lookup-maybe-slot
  0.04      0.02      0.02  system/vm/assembler.scm:2000:2:write-header
  0.04      0.02      0.02  language/cps/intmap.scm:513:2:intmap-fold
  0.04      0.02      0.02  language/cps/slot-allocation.scm:153:10
  0.04      0.02      0.02  language/cps/utils.scm:426:32
  0.04      0.02      0.02  system/vm/assembler.scm:1112:0:intern-cache-cell
  0.04      0.02      0.02  cons
  0.04      0.02      0.02  language/cps/intmap.scm:290:53
  0.04      0.02      0.02  system/vm/assembler.scm:1296:0:emit-label
  0.00     66.52      0.00  
language/cps/compile-bytecode.scm:585:0:emit-bytecode
  0.00     64.56      0.00  language/cps/compile-bytecode.scm:50:15
  0.00     60.03      0.00  
language/cps/compile-bytecode.scm:84:0:compile-function
  0.00     59.66      0.00  
language/cps/slot-allocation.scm:841:0:allocate-slots
  0.00     25.11      0.00  
language/cps/utils.scm:408:0:compute-sorted-strongly-connected-components
  0.00     23.45      0.00  
language/cps/slot-allocation.scm:386:0:compute-lazy-vars
  0.00     23.10      0.00  
language/cps/slot-allocation.scm:296:0:compute-live-variables
  0.00     13.32      0.00  
language/cps/utils.scm:390:0:compute-strongly-connected-components
  0.00      5.65      0.00  language/cps/utils.scm:125:0:intmap-map
  0.00      1.81      0.00  language/cps/slot-allocation.scm:287:0:rename-graph
  0.00      1.78      0.00  language/cps/slot-allocation.scm:276:0:rename-keys
  0.00      1.73      0.00  system/vm/assembler.scm:2578:0:link-objects
  0.00      1.73      0.00  system/vm/assembler.scm:2609:0:link-assembly
  0.00      1.71      0.00  system/vm/linker.scm:706:0:link-elf
  0.00      1.63      0.00  system/vm/linker.scm:635:0:allocate-elf
  0.00      1.63      0.00  system/vm/linker.scm:371:0:allocate-segment
  0.00      1.26      0.00  
language/cps/slot-allocation.scm:536:0:compute-shuffles
  0.00      1.09      0.00  language/cps/utils.scm:378:0:invert-graph
  0.00      1.04      0.00  language/cps/utils.scm:304:0:compute-successors
  0.00      1.02      0.00  system/vm/assembler.scm:2086:0:link-arities
  0.00      1.02      0.00  system/vm/assembler.scm:1999:0:write-arities
  0.00      0.84      0.00  
language/cps/slot-allocation.scm:149:0:compute-defs-and-uses
  0.00      0.72      0.00  
language/cps/slot-allocation.scm:724:0:allocate-lazy-vars
  0.00      0.67      0.00  language/cps/utils.scm:131:0:intmap-keys
  0.00      0.62      0.00  anon #x7f790ba74034
  0.00      0.62      0.00  language/cps/utils.scm:295:17
  0.00      0.54      0.00  language/cps/utils.scm:202:0:compute-constant-values
  0.00      0.52      0.00  system/vm/assembler.scm:1144:0:emit-load-constant
  0.00      0.42      0.00  language/cps/utils.scm:239:0:compute-function-body
  0.00      0.35      0.00  
language/cps/utils.scm:183:0:compute-defining-expressions
  0.00      0.30      0.00  language/cps/compile-bytecode.scm:587:21
  0.00      0.30      0.00  language/cps/compile-bytecode.scm:52:0:intmap-select
  0.00      0.27      0.00  
language/cps/compile-bytecode.scm:62:0:compute-forwarding-labels
  0.00      0.12      0.00  language/cps/compile-bytecode.scm:55:4
  0.00      0.12      0.00  system/vm/assembler.scm:1726:0:link-text-object
  0.00      0.12      0.00  language/cps/slot-allocation.scm:157:6:get-defs
  0.00      0.12      0.00  system/vm/assembler.scm:1645:0:link-constants
  0.00      0.10      0.00  language/cps/slot-allocation.scm:663:2:max-size
  0.00      0.10      0.00  language/cps/utils.scm:170:2:fixpoint
  0.00      0.10      0.00  hash-map->list
  0.00      0.07      0.00  
system/vm/assembler.scm:1318:0:emit-cached-module-box
  0.00      0.05      0.00  system/vm/assembler.scm:2533:4:write-die
  0.00      0.05      0.00  
system/vm/assembler.scm:1312:0:emit-cached-toplevel-box
  0.00      0.05      0.00  
system/vm/assembler.scm:1160:0:emit-load-static-procedure
  0.00      0.05      0.00  srfi/srfi-1.scm:586:2:map
  0.00      0.05      0.00  language/cps/compile-bytecode.scm:65:18
  0.00      0.05      0.00  language/cps/intmap.scm:336:2:adjoin
  0.00      0.02      0.00  system/vm/assembler.scm:1371:0:emit-init-constants
  0.00      0.02      0.00  system/vm/assembler.scm:701:0:encode-X8_S8_I16
  0.00      0.02      0.00  anon #x7f790ba74000
  0.00      0.02      0.00  language/cps/slot-allocation.scm:894:7
  0.00      0.02      0.00  
system/vm/assembler.scm:1308:0:emit-cache-current-module!
  0.00      0.02      0.00  language/cps/intmap.scm:424:0:intmap-next
  0.00      0.02      0.00  srfi/srfi-1.scm:606:7:map2
  0.00      0.02      0.00  system/vm/assembler.scm:1868:0:link-symtab
  0.00      0.02      0.00  
language/cps/slot-allocation.scm:579:2:compute-receive-shuffles
  0.00      0.02      0.00  language/cps/intmap.scm:331:0:intmap-replace
---
Sample count: 2686
Total time: 66.515425859 seconds (31.859444991 seconds in GC)
--8<---------------cut here---------------end--------------->8---

The intmap ‘big-cps’ has those 428K elements distributed in 13,814
33-element vectors (per ‘intmap-vector-count’ in the attached file.)

I fail to see why we’re GC’ing so much (‘visit-branch’ is also first in
the GC profile).

It looks like (1) something’s not working as intended GC-wise
(unintended vector retention somewhere?), and (2) the O(log N)
complexity, IIUC, of intmap/intset lookups may be penalizing on large
input programs.

Andy, WDYT?

Thanks,
Ludo’.

(use-modules (ice-9 match)
             (statprof)
             (charting prof)
             (ice-9 time)
             (srfi srfi-43)
             (system base target)
             (system base message))


(use-modules (language tree-il optimize)
             (language cps optimize)
             (language cps renumber)
             (language cps split-rec)
             (language cps compile-bytecode))

(define %default-optimizations
  ;; Default optimization options (equivalent to -O2 on Guile 2.2).
  (append (tree-il-default-optimization-options)
          (cps-default-optimization-options)))

(define %lightweight-optimizations
  ;; Lightweight optimizations (like -O0, but with partial evaluation).
  (let loop ((opts %default-optimizations)
             (result '()))
    (match opts
      (() (reverse result))
      ((#:partial-eval? _ rest ...)
       (loop rest `(#t #:partial-eval? ,@result)))
      ((kw _ rest ...)
       (loop rest `(#f ,kw ,@result))))))

(define big-cps
  (call-with-input-file "t.cps"
    (lambda (port)
      ((@@ (language cps spec) read-cps) port '()))))

(define-syntax-rule (intmap-min m) (struct-ref m 0))
(define-syntax-rule (intmap-shift m) (struct-ref m 1))
(define-syntax-rule (intmap-root m) (struct-ref m 2))

(define (intmap-vector-count m)
  (let loop ((vector (intmap-root m))
             (total 0))
    (if (vector? vector)
        (vector-fold (lambda (idx result item)
                       (if (vector? item)
                           (loop item (+ 1 result))
                           result))
                     total
                     vector)
        total)))

(define (intmap-depth m)
  (let loop ((vector (intmap-root m))
             (depth 0))
    (if (vector? vector)
        (vector-fold (lambda (idx result item)
                       (if (vector? item)
                           (max result
                                (loop item (+ 1 depth)))
                           result))
                     depth
                     vector)
        depth)))

(pk 'go!)

;; (statprof-start)
(let ((cps ((@@ (language cps compile-bytecode) lower-cps)
            big-cps %lightweight-optimizations)))
  (statprof
   (lambda ()
     ((@@ (language cps compile-bytecode) emit-bytecode)
      cps '() %lightweight-optimizations)
     ;; (compile-bytecode big-cps '() %lightweight-optimizations)
     )
   #:count-calls? #t))
;; (statprof-stop)

;; (chartprof "t.png")

reply via email to

[Prev in Thread] Current Thread [Next in Thread]