gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[taler-grid5k] 125/189: periodic prometheus backup which can be viewed l


From: gnunet
Subject: [taler-grid5k] 125/189: periodic prometheus backup which can be viewed locally with additional/recover/run.sh
Date: Thu, 28 Apr 2022 10:48:15 +0200

This is an automated email from the git hooks/post-receive script.

marco-boss pushed a commit to branch master
in repository grid5k.

commit a1397845447a09684239e88201833c4b3501cbac
Author: Boss Marco <bossm8@bfh.ch>
AuthorDate: Thu Apr 7 15:30:15 2022 +0200

    periodic prometheus backup which can be viewed locally with 
additional/recover/run.sh
---
 additional/persist.sh                              |  4 +-
 additional/recover/.gitignore                      |  4 ++
 .../provisioning/dashboards/taler-perf.yaml        |  9 ++++
 .../provisioning/datasources/prometheus.yaml       | 12 +++++
 .../recover/assets/prometheus/prometheus.yaml      |  0
 additional/recover/docker-compose.yaml             | 52 ++++++++++++++++++++
 additional/recover/run.sh                          | 55 ++++++++++++++++++++++
 .../systemd/system/taler-prometheus-backup.service | 11 +++++
 .../systemd/system/taler-prometheus-backup.timer   | 10 ++++
 experiment/scripts/dns.sh                          |  0
 experiment/scripts/monitor.sh                      |  3 +-
 experiment/scripts/prometheus-backup.sh            | 33 +++++++++++++
 experiment/scripts/setup.sh                        |  2 +-
 13 files changed, 191 insertions(+), 4 deletions(-)

diff --git a/additional/persist.sh b/additional/persist.sh
index a3c3882..9e72e9d 100755
--- a/additional/persist.sh
+++ b/additional/persist.sh
@@ -51,9 +51,9 @@ BACKUP_DIR="archives"
 
 G5K_BACKUP="g5k-$(date +%s).tar.gz"
 
-ssh g5k 'tar --ignore-failed-read -czvf grenoble/g5k.tar.gz $(find . -type l 
-exec echo "{}/taler-logs/" \;)'
+ssh g5k 'tar --ignore-failed-read -czvf grenoble/g5k.tar.gz $(find . -type l 
-exec echo "{}/taler-perf/" \;)'
 scp g5k:~/grenoble/g5k.tar.gz ${G5K_BACKUP}
-ssh g5k 'rm grenoble/g5k.tar.gz && $(find . -type l -exec rm -rf 
"{}/taler-logs/" \;)'
+ssh g5k 'rm grenoble/g5k.tar.gz && $(find . -type l -exec rm -rf 
"{}/taler-perf/" \;)'
 
 test -d ${BACKUP_DIR} || mkdir ${BACKUP_DIR}
 
diff --git a/additional/recover/.gitignore b/additional/recover/.gitignore
new file mode 100644
index 0000000..22a7683
--- /dev/null
+++ b/additional/recover/.gitignore
@@ -0,0 +1,4 @@
+!assets
+!*.yaml
+!.gitignore
+*
diff --git 
a/additional/recover/assets/grafana/provisioning/dashboards/taler-perf.yaml 
b/additional/recover/assets/grafana/provisioning/dashboards/taler-perf.yaml
new file mode 100644
index 0000000..63f8ab0
--- /dev/null
+++ b/additional/recover/assets/grafana/provisioning/dashboards/taler-perf.yaml
@@ -0,0 +1,9 @@
+apiVersion: 1
+
+providers:
+  - name: Taler
+    orgId: 1
+    type: file
+    options:
+      path: /etc/taler/dashboards
+      foldersFromFilesStructure: false
diff --git 
a/additional/recover/assets/grafana/provisioning/datasources/prometheus.yaml 
b/additional/recover/assets/grafana/provisioning/datasources/prometheus.yaml
new file mode 100644
index 0000000..0900450
--- /dev/null
+++ b/additional/recover/assets/grafana/provisioning/datasources/prometheus.yaml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    orgId: 1
+    uid: default-prometheus
+    url: http://prometheus:9090
+    isDefault: true
+    editable: false
+    version: 1
diff --git a/additional/recover/assets/prometheus/prometheus.yaml 
b/additional/recover/assets/prometheus/prometheus.yaml
new file mode 100644
index 0000000..e69de29
diff --git a/additional/recover/docker-compose.yaml 
b/additional/recover/docker-compose.yaml
new file mode 100644
index 0000000..16b6f62
--- /dev/null
+++ b/additional/recover/docker-compose.yaml
@@ -0,0 +1,52 @@
+services:
+
+  prometheus:
+    restart: always
+    hostname: prometheus
+    image: prom/prometheus:latest
+    restart: 'no'
+    container_name: prometheus
+    user: "${U_ID}:${G_ID}"
+    ports:
+      - "9090:9090"
+    command: 
+      - '--storage.tsdb.retention.size=1TB'
+    volumes:
+      - ${PWD}/assets/prometheus/prometheus.yaml:/prometheus/prometheus.yml
+      - /tmp/taler-perf/prometheus-data/:/prometheus/data
+
+  grafana:
+    restart: always
+    hostname: grafana
+    image: grafana/grafana:latest
+    restart: 'no'
+    container_name: grafana
+    user: "${U_ID}:${G_ID}"
+    environment:
+      GF_SECURITY_ADMIN_USER: taler
+      GF_SECURITY_ADMIN_PASSWORD: taler
+      GF_AUTH_ANONYMOUS_ENABLED: 'true'
+      GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
+      GF_SESSION_COOKIE_SECURE: 'false'
+      GF_AUTH_BASIC_ENABLED: 'false'
+      GF_SERVER_DOMAIN: 'localhost'
+      GF_ANALYTICS_REPORTING_ENABLED: 'false'
+      GF_ALERTING_ENABLED: 'false'
+      GF_SECRET_KEY: super-safe-secret-key
+      GF_USERS_ALLOW_SIGNUPS: 'false'
+      GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: 
/etc/taler/dashboards/taler-performance.json
+      GF_RENDERING_SERVER_URL: 'http://grafana-renderer:8081/render'
+      GF_RENDERING_CALLBACK_URL: 'http://grafana:3000/'
+      GF_LIVE_ALLOWED_ORIGINS: 'http://grafana:3000'
+    ports:
+      - "80:3000"
+    volumes:
+      - ${PWD}/assets/grafana/provisioning:/etc/grafana/provisioning
+      - /tmp/taler-perf/dashboards:/etc/taler/dashboards
+
+  renderer:
+    image: grafana/grafana-image-renderer:latest
+    restart: 'no'
+    hostname: grafana-renderer
+    container_name: grafana-renderer
+
diff --git a/additional/recover/run.sh b/additional/recover/run.sh
new file mode 100755
index 0000000..da407f6
--- /dev/null
+++ b/additional/recover/run.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Script to run a local instance of grafana and prometheus
+# with a prometheus snapshot of an experiment
+
+if [[ -z ${1} ]]; then
+  echo "Usage: run.sh PROMETHEUS_SNAPSHOT_DIR"
+  exit 1
+fi
+
+# use a backup as it gets overriden by Prometheus at start
+PROMETHEUS_DATA=/tmp/taler-perf/prometheus-data
+cp -r ${1} ${PROMETHEUS_DATA}
+
+# load the times to adjust the grafana dashboards
+source "${PROMETHEUS_DATA}/times.env"
+
+START=$(date --date=@${EXPERIMENT_START} +"%F %T")
+END=$(date --date=@${SNAPSHOT_TIME} +"%F %T")
+
+# to get the right permissions on the Prometheus data
+export U_ID=$(id -u)
+export G_ID=$(id -g)
+
+function startup() {
+
+  # prepare the dashboards for grafana
+  # need to replace the DS_PROMETHEUS placeholder which is created when
+  # exporting and set the timestamps that it works out of the box without
+  # having to search for the experiment time in the browser
+  for FILE in $(find ../grafana -iname "*json" ! -iname "logs.json") 
+  do
+    OUTPUT="/tmp/taler-perf/dashboards/$(basename $FILE)"
+    sed 's/${DS_PROMETHEUS}/default-prometheus/g' $FILE | \
+    jq --arg f "${START}" --arg t "${END}" '.time.from=$f | .time.to=$t' > 
$OUTPUT
+  done
+
+  docker-compose down
+  docker-compose up &
+}
+
+function cleanup() {
+  docker-compose down -v
+  rm -rf ${PROMETHEUS_DATA}
+}
+
+function ctrl_c() {
+  echo "Shutting down"
+  cleanup
+}
+
+trap ctrl_c INT
+
+startup
+wait
diff --git a/configs/usr/lib/systemd/system/taler-prometheus-backup.service 
b/configs/usr/lib/systemd/system/taler-prometheus-backup.service
new file mode 100644
index 0000000..dab9b9c
--- /dev/null
+++ b/configs/usr/lib/systemd/system/taler-prometheus-backup.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=Create periodic snapshots of the Prometheus TSB in the experiments
+Wants=taler-prometheus-backup.service
+
+[Service]
+EnvironmentFile=/etc/environment
+Type=oneshot
+ExecStart=/root/scripts/prometheus-backup.sh
+
+[Install]
+WantedBy=multi-user.target
diff --git a/configs/usr/lib/systemd/system/taler-prometheus-backup.timer 
b/configs/usr/lib/systemd/system/taler-prometheus-backup.timer
new file mode 100644
index 0000000..cbb6c60
--- /dev/null
+++ b/configs/usr/lib/systemd/system/taler-prometheus-backup.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=Create periodic snapshots of the Prometheus TSB in the experiments
+Requires=taler-prometheus-backup.service
+
+[Timer]
+Unit=taler-prometheus-backup.service
+OnCalendar=*:0/2
+
+[Install]
+WantedBy=timers.target
diff --git a/experiment/scripts/dns.sh b/experiment/scripts/dns.sh
old mode 100644
new mode 100755
diff --git a/experiment/scripts/monitor.sh b/experiment/scripts/monitor.sh
index b29bb33..30ec6cd 100755
--- a/experiment/scripts/monitor.sh
+++ b/experiment/scripts/monitor.sh
@@ -222,7 +222,8 @@ function init_monitor() {
   configure_prometheus_and_exporters
 
   systemctl restart prometheus-postgres-exporter \
-                    prometheus
+                    prometheus \
+                   taler-prometheus-backup.timer
 
   exit 0
 }
diff --git a/experiment/scripts/prometheus-backup.sh 
b/experiment/scripts/prometheus-backup.sh
new file mode 100755
index 0000000..f8c056c
--- /dev/null
+++ b/experiment/scripts/prometheus-backup.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Script which creates a snapshot of a running Prometheus
+# TSDB on the same host (port 8080)
+
+# This will copy the snapshot to the configured LOG_DIR
+# (setup.sh) hopefully persistent on the Grid5000 NFS
+
+set -eu
+
+PROMETHEUS_DATA_DIR=/var/lib/prometheus/metrics2/
+SNAPSHOT_COPY_PATH=${LOG_DIR}/tmp-prom-snapshot
+SNAPSHOT_PERS_PATH=${LOG_DIR}/prometheus-snapshot
+
+END_TIME=$(date +%s)
+
+SNAPSHOT="${PROMETHEUS_DATA_DIR}/$(\
+  curl \
+    -X POST \
+    http://localhost:8080/api/v1/admin/tsdb/snapshot \
+  | jq -r '.data.name'
+)"
+
+# export timestamps for the recover script in g5k-repo/additional
+echo "
+EXPERIMENT_START=${START_TIME}
+SNAPSHOT_TIME=${END_TIME}
+" >> "${SNAPSHOT}/times.env"
+
+# copy first because its nfs, when copied remove the previous one
+cp -r "${SNAPSHOT}" "${SNAPSHOT_COPY_PATH}"
+rm -rf "${SNAPSHOT_PERS_PATH}"
+mv "${SNAPSHOT_COPY_PATH}" "${SNAPSHOT_PERS_PATH}" 
diff --git a/experiment/scripts/setup.sh b/experiment/scripts/setup.sh
index 4f6ee14..857f52a 100644
--- a/experiment/scripts/setup.sh
+++ b/experiment/scripts/setup.sh
@@ -58,7 +58,7 @@ function parse_experiment_nodes() {
 # Determine and create the base log directory
 # If NFS exists, it will be created in the users home dir on the NFS
 function setup_log_dir() {
-  LOG_DIR=/home/${G5K_USER}/taler-logs
+  LOG_DIR=/home/${G5K_USER}/taler-perf
   
   if [ -d ${LOG_DIR} ]; then
     # If multiple nodes want to delete the same dir we run into errors

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]