[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[shepherd] 03/05: timer: Log the start time and end time of each action.
From: |
Ludovic Courtès |
Subject: |
[shepherd] 03/05: timer: Log the start time and end time of each action. |
Date: |
Fri, 10 May 2024 19:04:30 -0400 (EDT) |
civodul pushed a commit to branch devel
in repository shepherd.
commit b4ca2abbfa072a8aeba3ef5483a6fc1a09bfc207
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Fri May 10 21:29:40 2024 +0200
timer: Log the start time and end time of each action.
So far we’d only record the end time of processes by reusing the
‘record-process-exit-status’ mechanism. With this change, timers
preserve that information internally and with additional details: the
start time, end time, and status, including for actions implemented as
thunks rather than processes.
* modules/shepherd/service/timer.scm (serializer-timer): Rename to…
(serialize-timer): … this. Add ‘past-runs’ entry.
(timer-request): New procedure.
(timer-processes): Rewrite in terms of ‘timer-request’.
(timer-past-runs): New procedure.
(%past-run-log-size): New variable.
(make-timer-constructor): Use ‘let-loop’. Change ‘processes’ to a list
of PID/start time pairs. In 'process-terminated handler, replace
‘record-process-exit-status’ by an addition to ‘past-runs’. Add
'past-runs handler. In 'timeout handler, capture start and end time, as
well as any exception thrown.
* modules/shepherd/scripts/herd.scm (seconds->string): New procedure.
(display-process-exit-status): Add ‘duration’ parameter and honor it.
(display-service-status): Adjust timer handling for child processes and
for past runs.
* modules/shepherd/service.scm (record-process-exit-status): Do not
export.
* tests/services/timer.sh: Test logging of completion for thunks.
---
modules/shepherd/scripts/herd.scm | 110 ++++++++++++++++++++++++++++---------
modules/shepherd/service.scm | 1 -
modules/shepherd/service/timer.scm | 103 +++++++++++++++++++++++-----------
tests/services/timer.sh | 13 +++++
4 files changed, 168 insertions(+), 59 deletions(-)
diff --git a/modules/shepherd/scripts/herd.scm
b/modules/shepherd/scripts/herd.scm
index d8db6f7..503fb4a 100644
--- a/modules/shepherd/scripts/herd.scm
+++ b/modules/shepherd/scripts/herd.scm
@@ -265,6 +265,24 @@ transient status for too long."
;; relative date string like "2 hours ago".
(format #f (l10n "~a (~a)") absolute relative))
+(define (seconds->string seconds)
+ "Return a string representing @var{seconds} as a duration in a
+human-friendly way."
+ (cond ((< seconds 180)
+ ;; TRANSLATORS: This string and the following ones denote a duration.
+ ;; It ends up being inserted in a sentence like "Process terminated
+ ;; after 10 seconds". (Arguably not ideal.)
+ (format #f (l10n "~h second" "~h seconds" seconds)
+ seconds))
+ ((< seconds (* 180 60))
+ (let ((minutes (quotient seconds 60)))
+ (format #f (l10n "~h minute" "~h minutes" minutes)
+ minutes)))
+ (else
+ (let ((hours (quotient seconds 3600)))
+ (format #f (l10n "~h hour" "~h hours" hours)
+ hours)))))
+
(define %default-log-history-size
;; Number of log lines displayed by default.
10)
@@ -290,29 +308,48 @@ relevant bits quoted according to POSIX shell rules."
str))
command)))
-(define (display-process-exit-status status)
+(define* (display-process-exit-status status #:optional duration)
"Display @var{status}, a process status as returned by @code{waitpid}, in a
-human-friendly way."
+human-friendly way. When @var{duration} is provided, it is the number of
+seconds during which the process ran."
(cond ((zero? status)
- (format #t (l10n "Process exited successfully.~%")))
+ (if duration
+ (format #t (l10n "Process exited successfully after ~a.~%")
+ (seconds->string duration))
+ (format #t (l10n "Process exited successfully.~%"))))
((status:exit-val status)
=>
(lambda (code)
- (format #t (highlight/error
- (l10n "Process exited with code ~a.~%"))
- code)))
+ (if duration
+ (format #t
+ (highlight/error
+ (l10n "Process exited with code ~a after ~a.~%"))
+ code (seconds->string duration))
+ (format #t (highlight/error
+ (l10n "Process exited with code ~a.~%"))
+ code))))
((status:term-sig status)
=>
(lambda (signal)
- (format #t (highlight/error
- (l10n "Process terminated with signal ~a.~%"))
- signal)))
+ (if duration
+ (format #t
+ (highlight/error
+ (l10n "Process terminated with signal ~a after ~a.~%"))
+ signal (seconds->string duration))
+ (format #t (highlight/error
+ (l10n "Process terminated with signal ~a.~%"))
+ signal))))
((status:stop-sig status)
=>
(lambda (signal)
- (format #t (highlight/error
- (l10n "Process stopped with signal ~a.~%"))
- signal)))))
+ (if duration
+ (format #t
+ (highlight/error
+ (l10n "Process stopped with signal ~a after ~a.~%"))
+ signal (seconds->string duration))
+ (format #t (highlight/error
+ (l10n "Process stopped with signal ~a.~%"))
+ signal))))))
(define* (display-timer-events event #:optional (count 5))
"Display the @var{count} upcoming timer alarms that match @var{event}, a
@@ -409,7 +446,9 @@ calendar event."
(format #t (l10n " Child process:~{ ~a~}~%"
" Child processes:~{ ~a~}~%"
(length processes))
- processes)))
+ (match processes
+ (((pids . start-times) ...)
+ pids)))))
('procedure
(format #t (l10n " Periodically running Scheme code.~%")))
(_ #f)))
@@ -479,20 +518,37 @@ to upgrade).~%"))))
(when show-recent-messages?
(match (live-service-running-value service)
- (('timer . _)
- (match (live-service-process-exit-statuses service)
- (()
- #t)
- (statuses
- (newline)
- (format #t (highlight (l10n "Recent runs:~%")))
- (for-each (match-lambda
- ((status . time)
- (format #t " ~a"
- (strftime default-logfile-date-format
- (localtime time)))
- (display-process-exit-status status)))
- (reverse (at-most timer-history-size statuses))))))
+ (('timer ('version 0) properties ...)
+ (alist-let* properties (past-runs)
+ (match past-runs
+ ((or () #f)
+ #t)
+ (statuses
+ (newline)
+ (format #t (highlight (l10n "Recent runs:~%")))
+ (for-each (match-lambda
+ ((status end start)
+ (format #t " ~a"
+ (strftime default-logfile-date-format
+ (localtime end)))
+ (match status
+ ((? integer?)
+ (display-process-exit-status status
+ (- end start)))
+ ('success
+ (format #t (l10n "Completed in ~a.~%")
+ (seconds->string (- end start))))
+ (('exception key args ...)
+ (format #t (highlight/error
+ (l10n "Exception thrown after ~a:
~a~%"))
+ (seconds->string (- end start))
+ (string-trim-right
+ (call-with-output-string
+ (lambda (port)
+ (print-exception port #f
+ key args))))))
+ (_ #f))))
+ (reverse (at-most timer-history-size statuses)))))))
(_ #f))
(match (live-service-recent-messages service)
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index e3b7486..ce9cbb9 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -105,7 +105,6 @@
spawn-command
spawn-shell-command
terminate-process
- record-process-exit-status
%precious-signals
register-services
unregister-services
diff --git a/modules/shepherd/service/timer.scm
b/modules/shepherd/service/timer.scm
index e6837fe..343dd77 100644
--- a/modules/shepherd/service/timer.scm
+++ b/modules/shepherd/service/timer.scm
@@ -535,7 +535,7 @@ list, to be executed as @var{user} and @var{group}, with
the given
#:resource-limits resource-limits))
(_ #f)))
-(define-record-type-serializer (serializer-timer (timer <timer>))
+(define-record-type-serializer (serialize-timer (timer <timer>))
;; Serialize TIMER to clients can inspect it.
`(timer (version 0)
(event ,(match (timer-event timer)
@@ -545,17 +545,35 @@ list, to be executed as @var{user} and @var{group}, with
the given
(action ,(match (timer-action timer)
((? command? command) (command->sexp command))
(_ 'procedure)))
- (processes ,(timer-processes timer))))
+ (processes ,(timer-processes timer))
+ (past-runs ,(ring-buffer->list (timer-past-runs timer)))))
-(define (timer-processes timer)
- "Return the list of PIDs of the currently running processes started by
-@var{timer}."
- (let ((reply (make-channel)))
- (put-message (timer-channel timer) `(processes ,reply))
- (get-message reply)))
+(define (timer-request message)
+ (lambda (timer)
+ "Send @var{message} to @var{timer} and return its reply."
+ (let ((reply (make-channel)))
+ (put-message (timer-channel timer) `(,message ,reply))
+ (get-message reply))))
+
+(define timer-processes
+ ;; Return the list of PID/start time pairs of the currently running
+ ;; processes started by the given timer.
+ (timer-request 'processes))
+
+(define timer-past-runs
+ ;; Return the list of past runs as a ring buffer. Each run has the form
+ ;; (STATUS END START). When the timer's action is a command, STATUS is an
+ ;; integer, its exit status; otherwise, STATUS is either 'success or
+ ;; '(exception ...). END and START are the completion and start times,
+ ;; respectively, as integers (seconds since the Epoch).
+ (timer-request 'past-runs))
(define sleep (@ (fibers) sleep))
+(define %past-run-log-size
+ ;; Maximum number of entries the log of timer runs.
+ 50)
+
(define* (make-timer-constructor event action
#:key log-file
wait-for-termination?)
@@ -577,8 +595,9 @@ instances running concurrently."
(name (service-canonical-name (current-service))))
(spawn-fiber
(lambda ()
- (let loop ((processes '())
- (termination #f))
+ (let-loop loop ((processes '()) ;PID/start time
+ (past-runs (ring-buffer %past-run-log-size))
+ (termination #f))
(match (if (or termination
(and (pair? processes) wait-for-termination?))
(get-message channel)
@@ -592,26 +611,37 @@ instances running concurrently."
"Terminating timer '~a' with ~a processes running."
(length processes))
name (length processes))
- (for-each (lambda (pid)
- (terminate-process pid SIGHUP))
+ (for-each (match-lambda
+ ((pid . _)
+ (terminate-process pid SIGHUP)))
processes)
;; If there are processes left, keep going until they're gone.
(if (pair? processes)
- (loop processes reply)
+ (loop (termination reply))
(put-message reply #t)))
(('process-terminated pid status)
;; Process PID completed.
- (local-output
- (l10n "Process ~a of timer '~a' terminated with status ~a.")
- pid name status)
- (record-process-exit-status pid status)
- (let ((processes (delv pid processes)))
- (if (and termination (null? processes))
+ (let ((start-time (assoc-ref processes pid))
+ (end-time ((@ (guile) current-time)))
+ (remaining (alist-delete pid processes)))
+ (local-output
+ (l10n "Process ~a of timer '~a' terminated with status ~a \
+after ~a seconds.")
+ pid name status
+ (- end-time start-time))
+ (if (and termination (null? remaining))
(put-message termination #t) ;done
- (loop processes termination))))
+ (loop (processes remaining)
+ (past-runs
+ (ring-buffer-insert
+ (list status end-time start-time)
+ past-runs))))))
(('processes reply)
(put-message reply processes)
- (loop processes termination))
+ (loop))
+ (('past-runs reply)
+ (put-message reply past-runs)
+ (loop))
('timeout
;; Time to perform ACTION.
(if (command? action)
@@ -633,16 +663,27 @@ instances running concurrently."
(local-output (l10n "Timer '~a' spawned process ~a.")
name pid)
- (loop (cons pid processes) termination))
- (begin
- (catch #t
- action
- (lambda (key . args)
- (local-output
- (l10n "Exception caught while calling action of \
+ (loop (processes
+ (alist-cons pid ((@ (guile) current-time))
+ processes))))
+ (let ((start-time ((@ (guile) current-time))))
+ (define result
+ (catch #t
+ (lambda ()
+ (action)
+ 'success)
+ (lambda (key . args)
+ (local-output
+ (l10n "Exception caught while calling action of \
timer '~a': ~s")
- name (cons key args))))
- (loop processes termination))))
+ name (cons key args))
+ `(exception ,key ,@args))))
+
+ (loop (past-runs
+ (ring-buffer-insert
+ (list result ((@ (guile) current-time)) start-time)
+ past-runs))))))
+
('overslept
;; Reached when resuming from sleep state: we slept
;; significantly more than the requested number of seconds. To
@@ -651,7 +692,7 @@ timer '~a': ~s")
(local-output (l10n "Waiting anew for timer '~a' (resuming \
from sleep state?).")
name)
- (loop processes termination))))))
+ (loop))))))
(timer channel event action))))
diff --git a/tests/services/timer.sh b/tests/services/timer.sh
index a4bb165..2158a43 100644
--- a/tests/services/timer.sh
+++ b/tests/services/timer.sh
@@ -49,6 +49,12 @@ cat > "$conf" <<EOF
(calendar-event #:seconds (iota 60))
(lambda () (display "Hello from procedure.\n")))
#:stop (make-timer-destructor))
+ (service '(timer-that-throws)
+ #:start (make-timer-constructor
+ (calendar-event #:seconds (iota 60))
+ (lambda () (display "Throwing!\n") (mkdir "/")))
+ #:stop (make-timer-destructor)
+ #:actions (list timer-trigger-action))
(service '(endless-timer)
#:start (make-timer-constructor
(calendar-event #:seconds (iota 60))
@@ -61,6 +67,7 @@ cat > "$conf" <<EOF
#:months
(if (<= (date-month (current-date)) 6)
'(12)
+
'(1)))
(command (quote ("sh" "-c" "echo Triggered from
\$PWD."))
#:directory "$PWD"))
@@ -88,8 +95,14 @@ $herd stop timer-with-command
$herd start timer-with-procedure
sleep 2
+$herd status timer-with-procedure | grep "Completed in" # recent runs
grep "Hello from procedure" "$log"
+$herd start timer-that-throws
+$herd trigger timer-that-throws
+grep "Throwing" "$log"
+$herd status timer-that-throws | grep "Exception thrown.*mkdir" # recent runs
+
rm -f "$service_pid"
$herd start endless-timer
sleep 2