[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[taler-grid5k] 135/189: add loki to recover setup
From: |
gnunet |
Subject: |
[taler-grid5k] 135/189: add loki to recover setup |
Date: |
Thu, 28 Apr 2022 10:48:25 +0200 |
This is an automated email from the git hooks/post-receive script.
marco-boss pushed a commit to branch master
in repository grid5k.
commit a0501668bfc4b60afe875bf62ef34ca468f4b4a3
Author: Boss Marco <bossm8@bfh.ch>
AuthorDate: Fri Apr 8 19:18:00 2022 +0200
add loki to recover setup
---
additional/recover/docker-compose.yaml | 20 ++++++++++++++----
additional/recover/run.sh | 24 ++++++++++++----------
configs/etc/monitor/promtail.yaml | 7 -------
.../lib/systemd/system/taler-data-backup.service | 11 ++++++++++
.../usr/lib/systemd/system/taler-data-backup.timer | 10 +++++++++
.../systemd/system/taler-prometheus-backup.service | 11 ----------
.../systemd/system/taler-prometheus-backup.timer | 10 ---------
.../{prometheus-backup.sh => data-backup.sh} | 23 +++++++++++++--------
8 files changed, 64 insertions(+), 52 deletions(-)
diff --git a/additional/recover/docker-compose.yaml
b/additional/recover/docker-compose.yaml
index 16b6f62..018f84c 100644
--- a/additional/recover/docker-compose.yaml
+++ b/additional/recover/docker-compose.yaml
@@ -1,7 +1,6 @@
services:
prometheus:
- restart: always
hostname: prometheus
image: prom/prometheus:latest
restart: 'no'
@@ -13,10 +12,9 @@ services:
- '--storage.tsdb.retention.size=1TB'
volumes:
- ${PWD}/assets/prometheus/prometheus.yaml:/prometheus/prometheus.yml
- - /tmp/taler-perf/prometheus-data/:/prometheus/data
+ - ${PROMETHEUS_DATA}:/prometheus/data
grafana:
- restart: always
hostname: grafana
image: grafana/grafana:latest
restart: 'no'
@@ -42,7 +40,7 @@ services:
- "80:3000"
volumes:
- ${PWD}/assets/grafana/provisioning:/etc/grafana/provisioning
- - /tmp/taler-perf/dashboards:/etc/taler/dashboards
+ - ${DASHBOARDS_DIR}:/etc/taler/dashboards
renderer:
image: grafana/grafana-image-renderer:latest
@@ -50,3 +48,17 @@ services:
hostname: grafana-renderer
container_name: grafana-renderer
+ loki:
+ image: grafana/loki:latest
+ restart: 'no'
+ hostname: loki
+ container_name: loki
+ user: "${U_ID}:${G_ID}"
+ ports:
+ - "3100:3100"
+ command:
+ - '-config.file=/etc/loki/loki.yml'
+ volumes:
+ - ${PWD}/assets/loki/loki.yaml:/etc/loki/loki.yml
+ - ${LOKI_DATA}:/tmp/loki
+
diff --git a/additional/recover/run.sh b/additional/recover/run.sh
index 4a62dc1..f14afd2 100755
--- a/additional/recover/run.sh
+++ b/additional/recover/run.sh
@@ -1,24 +1,25 @@
#!/bin/bash
-# Script to run a local instance of grafana and prometheus
-# with a prometheus snapshot of an experiment
+# Script to run a local instance of grafana, loki and prometheus
+# with a data (prometheus and loki) snapshot of an experiment
if [[ -z ${1} ]]; then
- echo "Usage: run.sh PROMETHEUS_SNAPSHOT_DIR"
+ echo "Usage: run.sh DATA_SNAPSHOT_ARCHIVE"
exit 1
fi
TMP_BASEDIR=/tmp/taler-perf
-DASHBOARDS_DIR="${TMP_BASEDIR}/dashboards"
+export LOKI_DATA="${TMP_BASEDIR}/loki"
+export PROMETHEUS_DATA="${TMP_BASEDIR}/prometheus"
+export DASHBOARDS_DIR="${TMP_BASEDIR}/dashboards"
+
mkdir -p ${DASHBOARDS_DIR} || true
-# use a backup as it gets overriden by Prometheus at start
-PROMETHEUS_DATA="${TMP_BASEDIR}/prometheus-data"
-cp -r ${1} ${PROMETHEUS_DATA}
+tar -xvf ${1} -C ${TMP_BASEDIR}
# load the times to adjust the grafana dashboards
-source "${PROMETHEUS_DATA}/times.env"
+source "${TMP_BASEDIR}/times.env"
START=$(date --date=@${EXPERIMENT_START} +"%F %T")
END=$(date --date=@${SNAPSHOT_TIME} +"%F %T")
@@ -33,10 +34,11 @@ function startup() {
# need to replace the DS_PROMETHEUS placeholder which is created when
# exporting and set the timestamps that it works out of the box without
# having to search for the experiment time in the browser
- for FILE in $(find ../grafana -iname "*json" ! -iname "logs.json")
+ for FILE in $(find ../grafana -iname "*json")
do
OUTPUT="${DASHBOARDS_DIR}/$(basename $FILE)"
- sed 's/${DS_PROMETHEUS}/default-prometheus/g' $FILE | \
+ sed -e 's/${DS_PROMETHEUS}/default-prometheus/g' \
+ -e 's/${DS_LOKI}/default-loki/g' $FILE | \
jq --arg f "${START}" --arg t "${END}" '.time.from=$f | .time.to=$t' >
$OUTPUT
done
@@ -46,7 +48,7 @@ function startup() {
function cleanup() {
docker-compose down -v
- rm -rf ${PROMETHEUS_DATA}
+ rm -rf ${TMP_BASEDIR}
}
function ctrl_c() {
diff --git a/configs/etc/monitor/promtail.yaml
b/configs/etc/monitor/promtail.yaml
index 4e37b75..f4b8524 100644
--- a/configs/etc/monitor/promtail.yaml
+++ b/configs/etc/monitor/promtail.yaml
@@ -38,13 +38,6 @@ scrape_configs:
- regex:
expression: 'duration: (?P<time>\d+\.*\d*) ms'
- metrics:
- num_slow_queries:
- prefix: 'taler_database_'
- type: Counter
- source: time
- description: 'Exchange database number of slow queries'
- config:
- action: inc
slow_query_duration_hist:
prefix: 'taler_database_'
type: Histogram
diff --git a/configs/usr/lib/systemd/system/taler-data-backup.service
b/configs/usr/lib/systemd/system/taler-data-backup.service
new file mode 100644
index 0000000..0ede02c
--- /dev/null
+++ b/configs/usr/lib/systemd/system/taler-data-backup.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=Create periodic snapshots of the Prometheus and Loki data in the
experiments
+Wants=taler-data-backup.service
+
+[Service]
+EnvironmentFile=/etc/environment
+Type=oneshot
+ExecStart=/root/scripts/data-backup.sh
+
+[Install]
+WantedBy=multi-user.target
diff --git a/configs/usr/lib/systemd/system/taler-data-backup.timer
b/configs/usr/lib/systemd/system/taler-data-backup.timer
new file mode 100644
index 0000000..b49a4a1
--- /dev/null
+++ b/configs/usr/lib/systemd/system/taler-data-backup.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=Create periodic snapshots of the Prometheus and Loki data in the
experiments
+Requires=taler-data-backup.service
+
+[Timer]
+Unit=taler-data-backup.service
+OnCalendar=*:0/2
+
+[Install]
+WantedBy=timers.target
diff --git a/configs/usr/lib/systemd/system/taler-prometheus-backup.service
b/configs/usr/lib/systemd/system/taler-prometheus-backup.service
deleted file mode 100644
index dab9b9c..0000000
--- a/configs/usr/lib/systemd/system/taler-prometheus-backup.service
+++ /dev/null
@@ -1,11 +0,0 @@
-[Unit]
-Description=Create periodic snapshots of the Prometheus TSB in the experiments
-Wants=taler-prometheus-backup.service
-
-[Service]
-EnvironmentFile=/etc/environment
-Type=oneshot
-ExecStart=/root/scripts/prometheus-backup.sh
-
-[Install]
-WantedBy=multi-user.target
diff --git a/configs/usr/lib/systemd/system/taler-prometheus-backup.timer
b/configs/usr/lib/systemd/system/taler-prometheus-backup.timer
deleted file mode 100644
index cbb6c60..0000000
--- a/configs/usr/lib/systemd/system/taler-prometheus-backup.timer
+++ /dev/null
@@ -1,10 +0,0 @@
-[Unit]
-Description=Create periodic snapshots of the Prometheus TSB in the experiments
-Requires=taler-prometheus-backup.service
-
-[Timer]
-Unit=taler-prometheus-backup.service
-OnCalendar=*:0/2
-
-[Install]
-WantedBy=timers.target
diff --git a/experiment/scripts/prometheus-backup.sh
b/experiment/scripts/data-backup.sh
similarity index 52%
rename from experiment/scripts/prometheus-backup.sh
rename to experiment/scripts/data-backup.sh
index 7a285bc..bc84ac0 100755
--- a/experiment/scripts/prometheus-backup.sh
+++ b/experiment/scripts/data-backup.sh
@@ -1,7 +1,7 @@
#!/bin/bash
-# Script which creates a snapshot of a running Prometheus
-# TSDB on the same host (port 8080)
+# Script which creates a snapshot of a running Prometheu
+# and Loki instance
# This will copy the snapshot to the configured LOG_DIR
# (setup.sh) hopefully persistent on the Grid5000 NFS
@@ -9,8 +9,10 @@
set -eu
PROMETHEUS_DATA_DIR=/var/lib/prometheus/metrics2/snapshots
-SNAPSHOT_COPY_PATH=${LOG_DIR}/tmp-prom-snapshot
-SNAPSHOT_PERS_PATH=${LOG_DIR}/prometheus-snapshot
+LOKI_DATA_DIR=/tmp/loki
+
+SNAPSHOT_COPY_PATH=${LOG_DIR}/tmp-data-snapshot
+SNAPSHOT_PERS_FILE=${LOG_DIR}/data-snapshot.tar.gz
END_TIME=$(date +%s)
@@ -21,13 +23,16 @@ SNAPSHOT="${PROMETHEUS_DATA_DIR}/$(\
| jq -r '.data.name'
)"
+# copy first because its nfs, when copied remove the previous one
+cp -r "${SNAPSHOT}" "${SNAPSHOT_COPY_PATH}/prometheus"
+cp -r "${LOKI_DATA_DIR}" "${SNAPSHOT_COPY_PATH}/loki"
+
# export timestamps for the recover script in g5k-repo/additional
echo "
EXPERIMENT_START=${START_TIME}
SNAPSHOT_TIME=${END_TIME}
-" >> "${SNAPSHOT}/times.env"
+" >> "${SNAPSHOT_COPY_PATH}/times.env"
-# copy first because its nfs, when copied remove the previous one
-cp -r "${SNAPSHOT}" "${SNAPSHOT_COPY_PATH}"
-rm -rf "${SNAPSHOT_PERS_PATH}"
-mv "${SNAPSHOT_COPY_PATH}" "${SNAPSHOT_PERS_PATH}"
+rm -rf "${SNAPSHOT_PERS_FILE}"
+tar -czvf "${SNAPSHOT_PERS_FILE}" -C "${SNAPSHOT_COPY_PATH}"
"${SNAPSHOT_COPY_PATH}"
+rm -rf "${SNAPSHOT_COPY_PATH}"
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [taler-grid5k] 189/189: Commented and Documented, (continued)
- [taler-grid5k] 189/189: Commented and Documented, gnunet, 2022/04/28
- [taler-grid5k] 144/189: possible to provide the complete tar archive for recovery, gnunet, 2022/04/28
- [taler-grid5k] 149/189: update proxy configuration for merchant and bank, gnunet, 2022/04/28
- [taler-grid5k] 145/189: add some doc, gnunet, 2022/04/28
- [taler-grid5k] 162/189: add centos image, gnunet, 2022/04/28
- [taler-grid5k] 186/189: rebuild on all nodes, gnunet, 2022/04/28
- [taler-grid5k] 176/189: fix node-exporter, gnunet, 2022/04/28
- [taler-grid5k] 155/189: experiment with two postgresql instances, gnunet, 2022/04/28
- [taler-grid5k] 157/189: fix persist script, delete manually, gnunet, 2022/04/28
- [taler-grid5k] 151/189: add progress for archiving, gnunet, 2022/04/28
- [taler-grid5k] 135/189: add loki to recover setup,
gnunet <=
- [taler-grid5k] 175/189: postgresql not loaded per default, gnunet, 2022/04/28
- [taler-grid5k] 159/189: 'cluster' (db) scripts added, gnunet, 2022/04/28
- [taler-grid5k] 132/189: fix regex, gnunet, 2022/04/28
- [taler-grid5k] 128/189: fix yaml, gnunet, 2022/04/28
- [taler-grid5k] 147/189: add proxy for fakebank, gnunet, 2022/04/28
- [taler-grid5k] 188/189: add more output to explain.py, gnunet, 2022/04/28
- [taler-grid5k] 125/189: periodic prometheus backup which can be viewed locally with additional/recover/run.sh, gnunet, 2022/04/28
- [taler-grid5k] 168/189: change grub - disable mitigations, gnunet, 2022/04/28
- [taler-grid5k] 148/189: add version to docker-compose, gnunet, 2022/04/28
- [taler-grid5k] 182/189: add centos note in readme, gnunet, 2022/04/28