guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Serious GC problem in HEAD


From: Ludovic Courtès
Subject: Re: Serious GC problem in HEAD
Date: Wed, 08 Nov 2006 11:00:46 +0100
User-agent: Gnus/5.110006 (No Gnus v0.6) Emacs/21.4 (gnu/linux)

Hi,

address@hidden (Ludovic Courtès) writes:

> Actually, I'm suspecting my gc-stats patch [0] to cause some GC problems
> (although this is quite unexpected since it touches only stat
> information).  For instance, consider the following program:
>
>   $ guile -e '(let loop ((a 0)) (loop a))'
>
> If `guile' is HEAD, then the memory consumption of that process (as
> shown by `top') continuously increases and at a pretty high rate (!).
> If `guile' is 1.8, then that does not happen.

I'm glad to announce that I just committed (in HEAD) a fix for this bug
(that I had introduced earlier, hmm...).

The issue is that in `scm_gc_for_newcell ()' (which is the main user of
GC sweeping), the global sweep statistics (`scm_gc_cells_collected' and
`scm_gc_cells_collected_1') did not get updated.  They only got updated
by full GCs (in `gc_end_stats ()') and consequently remained equal to
zero (since most of the collection took place in `scm_gc_for_newcell ()').
Thus, the GC was assuming that sweeping did not yield any collection and
consequently kept allocating new segments.

The patch (reproduced below) fixes this and further clarifies the flow
of GC sweep statistics.  In particular, it removes globals (most
importantly `scm_gc_cells_collected_1' and `scm_gc_cells_collected') so
that any function that needs to access this information must be
explicitly provided with it (see, for instance, `scm_i_adjust_min_yield'
and `scm_i_get_new_heap_segment').  As a result, I believe the code is
easier to audit now.

It all seems to work fine, but you may want to check it by yourself.  ;-)
As for merging it into 1.8, we should probably wait for some time (or
even not merge it)?

Thanks,
Ludovic.


2006-11-08  Ludovic Courtès  <address@hidden>
        
        * libguile/gc-freelist.c (scm_i_adjust_min_yield): Take two
        "sweep_stats" arguments; use them instead of accessing the global
        variables `scm_gc_cells_collected' and `scm_gc_cells_collected_1'.

        * libguile/gc-segment.c (scm_i_sweep_some_cards): Reset SWEEP
        before each iteration of the loop.
        (scm_i_sweep_some_segments): Reset SWEEP at each iteration.
        (scm_i_get_new_heap_segment): Take an additional argument
        SWEEP_STATS.  Compute MIN_CELLS as a function of it.

        * libguile/gc.c (scm_gc_cells_collected,
        scm_gc_cells_collected_1): Removed.
        (scm_i_gc_sweep_stats, scm_i_gc_sweep_stats_1): New.
        (scm_gc_cells_marked_acc, scm_gc_cells_swept_acc,
        scm_gc_time_taken, scm_gc_mark_time_taken, scm_gc_times,
        scm_gc_cell_yield_percentage, protected_obj_count): Made `static'.
        (scm_gc_stats): Use `scm_i_gc_sweep_stats' instead of
        `scm_gc_cells_(collected|swept)'.
        (gc_update_stats): New.
        (gc_end_stats): Use `scm_i_gc_sweep_stats' and
        `scm_i_gc_sweep_stats_1' instead of the former globals.
        (scm_gc_for_newcell): Invoke `gc_update_stats ()' after each
        `scm_i_sweep_some_segments' call.  This fixes a bug where the GC
        would keep allocating new segments instead of re-using collected
        cells (because `scm_gc_cells_collected' would remain zero).

        * libguile/gc.h (scm_gc_cells_swept, scm_gc_cells_collected,
        scm_gc_cell_yield_percentage): Removed.

        * libguile/private-gc.h (scm_gc_cells_collected_1): Removed.
        (scm_i_adjust_min_yield): Updated.
        (scm_i_get_new_heap_segment): Updated.

--- orig/configure.in
+++ mod/configure.in
@@ -28,7 +28,8 @@
 AC_PREREQ(2.53)
 
 AC_INIT(m4_esyscmd(. ./GUILE-VERSION && echo -n ${PACKAGE}),
-        m4_esyscmd(. ./GUILE-VERSION && echo -n ${GUILE_VERSION}))
+        m4_esyscmd(. ./GUILE-VERSION && echo -n ${GUILE_VERSION}),
+       address@hidden)
 AC_CONFIG_AUX_DIR([.])
 AC_CONFIG_SRCDIR(GUILE-VERSION)
 AM_INIT_AUTOMAKE([no-define])


--- orig/libguile/gc-freelist.c
+++ mod/libguile/gc-freelist.c
@@ -72,14 +72,13 @@
 
 
 
-/*
-  This adjust FREELIST variables to decide wether or not to allocate
-  more heap in the next GC run. It uses scm_gc_cells_collected and 
scm_gc_cells_collected1
- */
-
+/* Adjust FREELIST variables to decide wether or not to allocate more heap in
+   the next GC run based on SWEEP_STATS on SWEEP_STATS_1 (statistics
+   collected after the two last full GC).  */
 void
 scm_i_adjust_min_yield (scm_t_cell_type_statistics *freelist,
-                       scm_t_sweep_statistics sweep_stats)
+                       scm_t_sweep_statistics sweep_stats,
+                       scm_t_sweep_statistics sweep_stats_1)
 {
   /* min yield is adjusted upwards so that next predicted total yield
    * (allocated cells actually freed by GC) becomes
@@ -99,7 +98,8 @@
     {
       /* Pick largest of last two yields. */
       long delta = ((SCM_HEAP_SIZE * freelist->min_yield_fraction / 100)
-                  - (long) sweep_stats.collected);
+                  - (long) SCM_MAX (sweep_stats.collected,
+                                    sweep_stats_1.collected));
 #ifdef DEBUGINFO
       fprintf (stderr, " after GC = %lu, delta = %ld\n",
               (unsigned long) scm_cells_allocated,


--- orig/libguile/gc-segment.c
+++ mod/libguile/gc-segment.c
@@ -141,9 +141,8 @@
 }
 
 /* Sweep cards from SEG until we've gathered THRESHOLD cells.  On return,
-   *CELLS_SWEPT contains the number of cells that have been visited and
-   *CELLS_COLLECTED contains the number of cells actually collected.  A
-   freelist is returned, potentially empty.  */
+   SWEEP_STATS contains the number of cells that have been visited and
+   collected.  A freelist is returned, potentially empty.  */
 SCM
 scm_i_sweep_some_cards (scm_t_heap_segment *seg,
                        scm_t_sweep_statistics *sweep_stats)
@@ -206,8 +205,12 @@
 
   scm_i_sweep_statistics_init (sweep_stats);
 
+  scm_i_sweep_statistics_init (&sweep);
   while (scm_i_sweep_some_cards (seg, &sweep) != SCM_EOL)
-    scm_i_sweep_statistics_sum (sweep_stats, sweep);
+    {
+      scm_i_sweep_statistics_sum (sweep_stats, sweep);
+      scm_i_sweep_statistics_init (&sweep);
+    }
 
   seg->next_free_card =p;
 }
@@ -339,6 +342,7 @@
       if (scm_i_heap_segment_table[i]->freelist != fl)
        continue;
 
+      scm_i_sweep_statistics_init (&sweep);
       collected = scm_i_sweep_some_cards (scm_i_heap_segment_table[i],
                                          &sweep);
 
@@ -461,14 +465,12 @@
 }
 
 
-/*
-  Important entry point: try to grab some memory, and make it into a
-  segment.
-
-  RETURN: the index of the segment.
- */
-int 
+/* Important entry point: try to grab some memory, and make it into a
+   segment; return the index of the segment.  SWEEP_STATS should contain
+   global GC sweep statistics collected since the last full GC.  */
+int
 scm_i_get_new_heap_segment (scm_t_cell_type_statistics *freelist,
+                           scm_t_sweep_statistics sweep_stats,
                            policy_on_error error_policy)
 {
   size_t len;
@@ -489,7 +491,7 @@
      */
     float f = freelist->min_yield_fraction / 100.0;
     float h = SCM_HEAP_SIZE;
-    float min_cells = (f * h - scm_gc_cells_collected) / (1.0 - f);
+    float min_cells = (f * h - sweep_stats.collected) / (1.0 - f);
 
     /* Make heap grow with factor 1.5 */
     len =  freelist->heap_size / 2;


--- orig/libguile/gc.c
+++ mod/libguile/gc.c
@@ -209,20 +209,28 @@
  */
 unsigned long scm_cells_allocated = 0;
 unsigned long scm_mallocated = 0;
-unsigned long scm_gc_cells_collected;
-unsigned long scm_gc_cells_collected_1 = 0; /* previous GC yield */
-unsigned long scm_gc_malloc_collected;
-unsigned long scm_gc_ports_collected;
-unsigned long scm_gc_time_taken = 0;
+
+/* Global GC sweep statistics since the last full GC.  */
+static scm_t_sweep_statistics scm_i_gc_sweep_stats = { 0, 0 };
+static scm_t_sweep_statistics scm_i_gc_sweep_stats_1 = { 0, 0 };
+
+/* Total count of cells marked/swept.  */
+static double scm_gc_cells_marked_acc = 0.;
+static double scm_gc_cells_swept_acc = 0.;
+
+static unsigned long scm_gc_time_taken = 0;
 static unsigned long t_before_gc;
-unsigned long scm_gc_mark_time_taken = 0;
-unsigned long scm_gc_times = 0;
-unsigned long scm_gc_cells_swept = 0;
-double scm_gc_cells_marked_acc = 0.;
-double scm_gc_cells_swept_acc = 0.;
-int scm_gc_cell_yield_percentage =0;
+static unsigned long scm_gc_mark_time_taken = 0;
+
+static unsigned long scm_gc_times = 0;
+
+static int scm_gc_cell_yield_percentage = 0;
+static unsigned long protected_obj_count = 0;
+
+/* The following are accessed from `gc-malloc.c' and `gc-card.c'.  */
 int scm_gc_malloc_yield_percentage = 0;
-unsigned long protected_obj_count = 0;
+unsigned long scm_gc_malloc_collected = 0;
+unsigned long scm_gc_ports_collected = 0;
 
 
 SCM_SYMBOL (sym_cells_allocated, "cells-allocated");
@@ -346,10 +354,10 @@
   local_protected_obj_count = protected_obj_count;
   local_scm_gc_cells_swept =
     (double) scm_gc_cells_swept_acc
-    + (double) scm_gc_cells_swept;
+    + (double) scm_i_gc_sweep_stats.swept;
   local_scm_gc_cells_marked = scm_gc_cells_marked_acc 
-    +(double) scm_gc_cells_swept 
-    -(double) scm_gc_cells_collected;
+    +(double) scm_i_gc_sweep_stats.swept
+    -(double) scm_i_gc_sweep_stats.collected;
 
   for (i = table_size; i--;)
     {
@@ -393,6 +401,30 @@
 }
 #undef FUNC_NAME
 
+/* Update the global sweeping/collection statistics by adding SWEEP_STATS to
+   SCM_I_GC_SWEEP_STATS and updating related variables.  */
+static inline void
+gc_update_stats (scm_t_sweep_statistics sweep_stats)
+{
+  /* CELLS SWEPT is another word for the number of cells that were examined
+     during GC. YIELD is the number that we cleaned out. MARKED is the number
+     that weren't cleaned.  */
+
+  scm_gc_cell_yield_percentage = (sweep_stats.collected * 100) / SCM_HEAP_SIZE;
+
+  scm_i_sweep_statistics_sum (&scm_i_gc_sweep_stats, sweep_stats);
+
+  if ((scm_i_gc_sweep_stats.collected > scm_i_gc_sweep_stats.swept)
+      || (scm_cells_allocated < sweep_stats.collected))
+    {
+      printf ("internal GC error, please report to `"
+             PACKAGE_BUGREPORT "'\n");
+      abort ();
+    }
+
+  scm_cells_allocated -= sweep_stats.collected;
+}
+
 static void
 gc_start_stats (const char *what SCM_UNUSED)
 {
@@ -406,23 +438,18 @@
 gc_end_stats (scm_t_sweep_statistics sweep_stats)
 {
   unsigned long t = scm_c_get_internal_run_time ();
-  scm_gc_time_taken += (t - t_before_gc);
-
-  /*
-    CELLS SWEPT is another word for the number of cells that were
-    examined during GC. YIELD is the number that we cleaned
-    out. MARKED is the number that weren't cleaned.
-   */
-  scm_gc_cells_marked_acc += (double) sweep_stats.swept
-    - (double) scm_gc_cells_collected;
-  scm_gc_cells_swept_acc += (double) sweep_stats.swept;
 
-  scm_gc_cell_yield_percentage = (sweep_stats.collected * 100) / SCM_HEAP_SIZE;
+  scm_gc_time_taken += (t - t_before_gc);
 
-  scm_gc_cells_swept = sweep_stats.swept;
-  scm_gc_cells_collected_1 = scm_gc_cells_collected;
-  scm_gc_cells_collected = sweep_stats.collected;
-  scm_cells_allocated -= sweep_stats.collected;
+  /* Reset the number of cells swept/collected since the last full GC.  */
+  scm_i_gc_sweep_stats_1 = scm_i_gc_sweep_stats;
+  scm_i_gc_sweep_stats.collected = scm_i_gc_sweep_stats.swept = 0;
+
+  gc_update_stats (sweep_stats);
+
+  scm_gc_cells_marked_acc += (double) scm_i_gc_sweep_stats.swept
+    - (double) scm_i_gc_sweep_stats.collected;
+  scm_gc_cells_swept_acc += (double) scm_i_gc_sweep_stats.swept;
 
   ++scm_gc_times;
 }
@@ -480,13 +507,17 @@
   scm_gc_running_p = 1;
 
   *free_cells = scm_i_sweep_some_segments (freelist, &sweep_stats);
-  scm_cells_allocated -= sweep_stats.collected;
+  gc_update_stats (sweep_stats);
 
   if (*free_cells == SCM_EOL && scm_i_gc_grow_heap_p (freelist))
     {
-      freelist->heap_segment_idx = scm_i_get_new_heap_segment (freelist, 
abort_on_error);
+      freelist->heap_segment_idx =
+       scm_i_get_new_heap_segment (freelist,
+                                   scm_i_gc_sweep_stats,
+                                   abort_on_error);
+
       *free_cells = scm_i_sweep_some_segments (freelist, &sweep_stats);
-      scm_cells_allocated -= sweep_stats.collected;
+      gc_update_stats (sweep_stats);
     }
 
   if (*free_cells == SCM_EOL)
@@ -495,7 +526,9 @@
        with the advent of lazy sweep, GC yield is only known just
        before doing the GC.
       */
-      scm_i_adjust_min_yield (freelist, sweep_stats);
+      scm_i_adjust_min_yield (freelist,
+                             scm_i_gc_sweep_stats,
+                             scm_i_gc_sweep_stats_1);
 
       /*
        out of fresh cells. Try to get some new ones.
@@ -505,7 +538,7 @@
       scm_i_gc ("cells");
 
       *free_cells = scm_i_sweep_some_segments (freelist, &sweep_stats);
-      scm_cells_allocated -= sweep_stats.collected;
+      gc_update_stats (sweep_stats);
     }
   
   if (*free_cells == SCM_EOL)
@@ -513,9 +546,13 @@
       /*
        failed getting new cells. Get new juice or die.
        */
-      freelist->heap_segment_idx = scm_i_get_new_heap_segment (freelist, 
abort_on_error);
+      freelist->heap_segment_idx =
+       scm_i_get_new_heap_segment (freelist,
+                                   scm_i_gc_sweep_stats,
+                                   abort_on_error);
+
       *free_cells = scm_i_sweep_some_segments (freelist, &sweep_stats);
-      scm_cells_allocated -= sweep_stats.collected;
+      gc_update_stats (sweep_stats);
     }
   
   if (*free_cells == SCM_EOL)


--- orig/libguile/gc.h
+++ mod/libguile/gc.h
@@ -277,13 +277,9 @@
 SCM_API struct scm_t_cell_type_statistics scm_i_master_freelist;
 SCM_API struct scm_t_cell_type_statistics scm_i_master_freelist2;
 
-
-SCM_API unsigned long scm_gc_cells_swept;
-SCM_API unsigned long scm_gc_cells_collected;
 SCM_API unsigned long scm_gc_malloc_collected;
 SCM_API unsigned long scm_gc_ports_collected;
 SCM_API unsigned long scm_cells_allocated;
-SCM_API int scm_gc_cell_yield_percentage;
 SCM_API int scm_gc_malloc_yield_percentage;
 SCM_API unsigned long scm_mallocated;
 SCM_API unsigned long scm_mtrigger;


--- orig/libguile/private-gc.h
+++ mod/libguile/private-gc.h
@@ -144,14 +144,13 @@
    }                                                   \
   while (0)
 
-
 
 extern scm_t_cell_type_statistics scm_i_master_freelist;
 extern scm_t_cell_type_statistics scm_i_master_freelist2;
-extern unsigned long scm_gc_cells_collected_1;
 
 void scm_i_adjust_min_yield (scm_t_cell_type_statistics *freelist,
-                            scm_t_sweep_statistics sweep_stats);
+                            scm_t_sweep_statistics sweep_stats,
+                            scm_t_sweep_statistics sweep_stats_1);
 void scm_i_gc_sweep_freelist_reset (scm_t_cell_type_statistics *freelist);
 int scm_i_gc_grow_heap_p (scm_t_cell_type_statistics * freelist);
 
@@ -270,7 +269,9 @@
      
 int scm_i_insert_segment (scm_t_heap_segment * seg);
 long int scm_i_find_heap_segment_containing_object (SCM obj);
-int scm_i_get_new_heap_segment (scm_t_cell_type_statistics *, policy_on_error);
+int scm_i_get_new_heap_segment (scm_t_cell_type_statistics *,
+                               scm_t_sweep_statistics,
+                               policy_on_error);
 void scm_i_clear_mark_space (void);
 void scm_i_sweep_segments (void);
 SCM scm_i_sweep_some_segments (scm_t_cell_type_statistics *fl,


reply via email to

[Prev in Thread] Current Thread [Next in Thread]