bug-guix
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#37757: Kernel panic upon shutdown


From: Ludovic Courtès
Subject: bug#37757: Kernel panic upon shutdown
Date: Thu, 28 Nov 2019 12:45:00 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)

Hello!

The attached patch should allow shepherd (PID 1) to dump core when it
crashes (systemd does something similar).

Jesse (and anyone else experiencing this!), could you try to (1)
reconfigure with this patch, (2) reboot, (3) try to halt the system to
reproduce the crash, and (4) retrieve a backtrace from the ‘core’ file?

For #4, you’ll have to do something along these lines once you’ve
rebooted after the crash:

  sudo gdb /run/current-system/profile/bin/guile /core

and then type “thread apply all bt” at the GDB prompt.

I’ll also try to do that on another machine where I’ve seen it happen.

Thanks in advance!

Ludo’.

diff --git a/gnu/services/shepherd.scm b/gnu/services/shepherd.scm
index 08bb33039c..ec49244cf6 100644
--- a/gnu/services/shepherd.scm
+++ b/gnu/services/shepherd.scm
@@ -277,45 +277,87 @@ and return the resulting '.go' file."
 
   (let ((files (map shepherd-service-file services)))
     (define config
-      #~(begin
-          (use-modules (srfi srfi-34)
-                       (system repl error-handling))
+      (with-imported-modules '((guix build syscalls))
+        #~(begin
+            (use-modules (srfi srfi-34)
+                         (system repl error-handling)
+                         (guix build syscalls)
+                         (system foreign))
 
-          ;; Arrange to spawn a REPL if something goes wrong.  This is better
-          ;; than a kernel panic.
-          (call-with-error-handling
-            (lambda ()
-              (apply register-services
-                     (map load-compiled '#$(map scm->go files)))))
+            (define signal
+              (let ((proc (pointer->procedure int
+                                              (dynamic-func "signal"
+                                                            (dynamic-link))
+                                              (list int '*))))
+                (lambda (signum handler)
+                  (proc signum
+                        (if (integer? handler)                ;SIG_DFL, etc.
+                            (make-pointer handler)
+                            (procedure->pointer void handler (list int)))))))
 
-          ;; guix-daemon 0.6 aborts if 'PATH' is undefined, so work around
-          ;; it.
-          (setenv "PATH" "/run/current-system/profile/bin")
+            (define (handle-crash sig)
+              (dynamic-wind
+                (const #t)
+                (lambda ()
+                  (gc-disable)
+                  (pk 'crash! sig)
+                  ;; Fork and have the child dump core at the root.
+                  (match (clone SIGCHLD)
+                    (0
+                     (setrlimit 'core #f #f)
+                     (chdir "/")
+                     (signal sig SIG_DFL)
+                     ;; Note: 'getpid' would return 1, hence this hack.
+                     (kill (string->number (readlink "/proc/self"))
+                           sig)
+                     (primitive-_exit 253))
+                    (child
+                     (waitpid child)
+                     (sync)
+                     ;; Hopefully at this point core has been dumped.
+                     (pk 'done)
+                     (sleep 3)
+                     (primitive-_exit 255))))
+                (lambda ()
+                  (primitive-_exit 254))))
 
-          (format #t "starting services...~%")
-          (for-each (lambda (service)
-                      ;; In the Shepherd 0.3 the 'start' method can raise
-                      ;; '&action-runtime-error' if it fails, so protect
-                      ;; against it.  (XXX: 'action-runtime-error?' is not
-                      ;; exported is 0.3, hence 'service-error?'.)
-                      (guard (c ((service-error? c)
-                                 (format (current-error-port)
-                                         "failed to start service '~a'~%"
-                                         service)))
-                        (start service)))
-                    '#$(append-map shepherd-service-provision
-                                   (filter shepherd-service-auto-start?
-                                           services)))
+            (signal SIGSEGV handle-crash)
 
-          ;; Hang up stdin.  At this point, we assume that 'start' methods
-          ;; that required user interaction on the console (e.g.,
-          ;; 'cryptsetup open' invocations, post-fsck emergency REPL) have
-          ;; completed.  User interaction becomes impossible after this
-          ;; call; this avoids situations where services wrongfully lead
-          ;; PID 1 to read from stdin (the console), which users may not
-          ;; have access to (see <https://bugs.gnu.org/23697>).
-          (redirect-port (open-input-file "/dev/null")
-                         (current-input-port))))
+            ;; Arrange to spawn a REPL if something goes wrong.  This is better
+            ;; than a kernel panic.
+            (call-with-error-handling
+              (lambda ()
+                (apply register-services
+                       (map load-compiled '#$(map scm->go files)))))
+
+            ;; guix-daemon 0.6 aborts if 'PATH' is undefined, so work around
+            ;; it.
+            (setenv "PATH" "/run/current-system/profile/bin")
+
+            (format #t "starting services...~%")
+            (for-each (lambda (service)
+                        ;; In the Shepherd 0.3 the 'start' method can raise
+                        ;; '&action-runtime-error' if it fails, so protect
+                        ;; against it.  (XXX: 'action-runtime-error?' is not
+                        ;; exported is 0.3, hence 'service-error?'.)
+                        (guard (c ((service-error? c)
+                                   (format (current-error-port)
+                                           "failed to start service '~a'~%"
+                                           service)))
+                          (start service)))
+                      '#$(append-map shepherd-service-provision
+                                     (filter shepherd-service-auto-start?
+                                             services)))
+
+            ;; Hang up stdin.  At this point, we assume that 'start' methods
+            ;; that required user interaction on the console (e.g.,
+            ;; 'cryptsetup open' invocations, post-fsck emergency REPL) have
+            ;; completed.  User interaction becomes impossible after this
+            ;; call; this avoids situations where services wrongfully lead
+            ;; PID 1 to read from stdin (the console), which users may not
+            ;; have access to (see <https://bugs.gnu.org/23697>).
+            (redirect-port (open-input-file "/dev/null")
+                           (current-input-port)))))
 
     (scheme-file "shepherd.conf" config)))
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]