[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[taler-grid5k] 140/189: move loki data to nfs directly as lost in copy
From: |
gnunet |
Subject: |
[taler-grid5k] 140/189: move loki data to nfs directly as lost in copy |
Date: |
Thu, 28 Apr 2022 10:48:30 +0200 |
This is an automated email from the git hooks/post-receive script.
marco-boss pushed a commit to branch master
in repository grid5k.
commit 9ca13042b916ed5bfafbb77ffdc1b76c3b1abf0f
Author: Boss Marco <bossm8@bfh.ch>
AuthorDate: Tue Apr 12 15:03:43 2022 +0200
move loki data to nfs directly as lost in copy
---
additional/grafana/exchange.json | 258 +++++++++++++++++++--
configs/etc/monitor/loki.yaml | 10 +-
.../usr/lib/systemd/system/taler-data-backup.timer | 2 +-
experiment/scripts/data-backup.sh | 14 +-
experiment/scripts/monitor.sh | 3 +
experiment/scripts/setup.sh | 12 +
6 files changed, 262 insertions(+), 37 deletions(-)
diff --git a/additional/grafana/exchange.json b/additional/grafana/exchange.json
index 29ed2a7..93ce003 100644
--- a/additional/grafana/exchange.json
+++ b/additional/grafana/exchange.json
@@ -767,7 +767,7 @@
},
"gridPos": {
"h": 8,
- "w": 14,
+ "w": 12,
"x": 0,
"y": 36
},
@@ -803,6 +803,10 @@
"type": "bargauge"
},
{
+ "datasource": {
+ "type": "loki",
+ "uid": "${DS_LOKI}"
+ },
"description": "",
"fieldConfig": {
"defaults": {
@@ -837,7 +841,75 @@
"gridPos": {
"h": 4,
"w": 3,
- "x": 14,
+ "x": 12,
+ "y": 36
+ },
+ "id": 33,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.4.3",
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "${DS_LOKI}"
+ },
+ "expr": "min_over_time(({app=\"taler-database\"} |~ \"duration:\" |
regexp \"duration: (?P<time>\\\\d*.*\\\\d*) ms\" | unwrap time) [5m])",
+ "queryType": "instant",
+ "refId": "A"
+ }
+ ],
+ "title": "Min",
+ "type": "stat"
+ },
+ {
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 0,
+ "text": "0"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 15,
"y": 36
},
"id": 25,
@@ -863,7 +935,7 @@
"uid": "${DS_PROMETHEUS}"
},
"exemplar": false,
- "expr":
"rate(taler_database_slow_query_duration_hist_sum{}[$__range]) /
rate(taler_database_slow_query_duration_hist_count{} [$__range])",
+ "expr": "rate(taler_database_slow_query_duration_hist_sum{}[5m]) /
rate(taler_database_slow_query_duration_hist_count{} [5m])",
"instant": true,
"interval": "",
"legendFormat": "",
@@ -874,6 +946,10 @@
"type": "stat"
},
{
+ "datasource": {
+ "type": "loki",
+ "uid": "${DS_LOKI}"
+ },
"description": "",
"fieldConfig": {
"defaults": {
@@ -908,10 +984,10 @@
"gridPos": {
"h": 4,
"w": 3,
- "x": 17,
+ "x": 18,
"y": 36
},
- "id": 27,
+ "id": 34,
"options": {
"colorMode": "none",
"graphMode": "none",
@@ -930,18 +1006,15 @@
"targets": [
{
"datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
+ "type": "loki",
+ "uid": "${DS_LOKI}"
},
- "exemplar": false,
- "expr": "histogram_quantile(0.9,
rate(taler_database_slow_query_duration_hist_bucket{}[$__range]))",
- "instant": true,
- "interval": "",
- "legendFormat": "",
+ "expr": "max_over_time(({app=\"taler-database\"} |~ \"duration:\" |
regexp \"duration: (?P<time>\\\\d*.*\\\\d*) ms\" | unwrap time) [5m])",
+ "queryType": "instant",
"refId": "A"
}
],
- "title": "90th Percentile",
+ "title": "Max",
"type": "stat"
},
{
@@ -972,13 +1045,13 @@
},
"gridPos": {
"h": 4,
- "w": 4,
- "x": 20,
+ "w": 3,
+ "x": 21,
"y": 36
},
"id": 10,
"options": {
- "colorMode": "value",
+ "colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
@@ -1043,7 +1116,7 @@
"gridPos": {
"h": 4,
"w": 3,
- "x": 14,
+ "x": 12,
"y": 40
},
"id": 26,
@@ -1079,6 +1152,78 @@
"title": "Median",
"type": "stat"
},
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "${DS_LOKI}"
+ },
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 0,
+ "text": "0"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 15,
+ "y": 40
+ },
+ "id": 35,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.4.3",
+ "targets": [
+ {
+ "datasource": {
+ "type": "loki",
+ "uid": "${DS_LOKI}"
+ },
+ "expr": "stddev_over_time(({app=\"taler-database\"} |~ \"duration:\"
| regexp \"duration: (?P<time>\\\\d*.*\\\\d*) ms\" | unwrap time) [5m])",
+ "queryType": "instant",
+ "refId": "A"
+ }
+ ],
+ "title": "Stddev",
+ "type": "stat"
+ },
{
"description": "",
"fieldConfig": {
@@ -1114,7 +1259,78 @@
"gridPos": {
"h": 4,
"w": 3,
- "x": 17,
+ "x": 18,
+ "y": 40
+ },
+ "id": 27,
+ "options": {
+ "colorMode": "none",
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.4.3",
+ "targets": [
+ {
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "exemplar": false,
+ "expr": "histogram_quantile(0.9,
rate(taler_database_slow_query_duration_hist_bucket{}[5m]))",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "90th Percentile",
+ "type": "stat"
+ },
+ {
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "options": {
+ "match": "null+nan",
+ "result": {
+ "index": 0,
+ "text": "0"
+ }
+ },
+ "type": "special"
+ }
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "ms"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 21,
"y": 40
},
"id": 28,
@@ -1140,7 +1356,7 @@
"uid": "${DS_PROMETHEUS}"
},
"exemplar": false,
- "expr": "histogram_quantile(0.99,
rate(taler_database_slow_query_duration_hist_bucket{}[$__range]))",
+ "expr": "histogram_quantile(0.99,
rate(taler_database_slow_query_duration_hist_bucket{}[5m]))",
"instant": true,
"interval": "",
"legendFormat": "",
@@ -1193,7 +1409,7 @@
]
},
"gridPos": {
- "h": 7,
+ "h": 8,
"w": 24,
"x": 0,
"y": 44
@@ -1308,6 +1524,6 @@
"timezone": "",
"title": "Exchange",
"uid": "NYO0EIE7k",
- "version": 33,
+ "version": 35,
"weekStart": ""
}
\ No newline at end of file
diff --git a/configs/etc/monitor/loki.yaml b/configs/etc/monitor/loki.yaml
index 461ea19..fe8e0db 100644
--- a/configs/etc/monitor/loki.yaml
+++ b/configs/etc/monitor/loki.yaml
@@ -10,7 +10,7 @@ querier:
ingester:
wal:
enabled: true
- dir: /tmp/loki/wal
+ dir: <EXPERIMENT_DATA_PATH_HERE>/loki/wal
lifecycler:
address: 127.0.0.1
ring:
@@ -36,15 +36,15 @@ schema_config:
storage_config:
boltdb_shipper:
- active_index_directory: /tmp/loki/boltdb-shipper-active
- cache_location: /tmp/loki/boltdb-shipper-cache
+ active_index_directory:
<EXPERIMENT_DATA_PATH_HERE>/loki/boltdb-shipper-active
+ cache_location: <EXPERIMENT_DATA_PATH_HERE>/loki/boltdb-shipper-cache
cache_ttl: 24h
shared_store: filesystem
filesystem:
- directory: /tmp/loki/chunks
+ directory: <EXPERIMENT_DATA_PATH_HERE>/loki/chunks
compactor:
- working_directory: /tmp/loki/boltdb-shipper-compactor
+ working_directory: <EXPERIMENT_DATA_PATH_HERE>/loki/boltdb-shipper-compactor
shared_store: filesystem
limits_config:
diff --git a/configs/usr/lib/systemd/system/taler-data-backup.timer
b/configs/usr/lib/systemd/system/taler-data-backup.timer
index 58f1aee..b49a4a1 100644
--- a/configs/usr/lib/systemd/system/taler-data-backup.timer
+++ b/configs/usr/lib/systemd/system/taler-data-backup.timer
@@ -4,7 +4,7 @@ Requires=taler-data-backup.service
[Timer]
Unit=taler-data-backup.service
-OnCalendar=*:0/5
+OnCalendar=*:0/2
[Install]
WantedBy=timers.target
diff --git a/experiment/scripts/data-backup.sh
b/experiment/scripts/data-backup.sh
index e5b60c2..cf497a4 100755
--- a/experiment/scripts/data-backup.sh
+++ b/experiment/scripts/data-backup.sh
@@ -14,12 +14,9 @@ if [[ $(ps -aux | grep "[data]-backup.sh" | wc -l) -eq 1 ]];
then
fi
PROMETHEUS_DATA_DIR=/var/lib/prometheus/metrics2/snapshots
-LOKI_DATA_DIR=/tmp/loki
SNAPSHOT_COPY_PATH=${LOG_DIR}/tmp-data-snapshot
-SNAPSHOT_PERS_FILE=${LOG_DIR}/data-snapshot.tar.gz
-
-mkdir "${SNAPSHOT_COPY_PATH}" || true
+SNAPSHOT_PATH=${EXPERIMENT_DATA_DIR}/prometheus
END_TIME=$(date +%s)
@@ -31,15 +28,12 @@ SNAPSHOT="${PROMETHEUS_DATA_DIR}/$(\
)"
# copy first because its nfs, when copied remove the previous one
-cp -r "${SNAPSHOT}" "${SNAPSHOT_COPY_PATH}/prometheus"
-cp -r "${LOKI_DATA_DIR}" "${SNAPSHOT_COPY_PATH}/loki"
+cp -r "${SNAPSHOT}" "${SNAPSHOT_COPY_PATH}"
+mv "${SNAPSHOT_COPY_PATH}" "${SNAPSHOT_PATH}"
# export timestamps for the recover script in g5k-repo/additional
echo "
EXPERIMENT_START=${START_TIME}
SNAPSHOT_TIME=${END_TIME}
-" >> "${SNAPSHOT_COPY_PATH}/times.env"
+" > "${EXPERIMENT_DATA_DIR}/times.env"
-rm -rf "${SNAPSHOT_PERS_FILE}"
-tar -C "${SNAPSHOT_COPY_PATH}" -czvf "${SNAPSHOT_PERS_FILE}" .
-rm -rf "${SNAPSHOT_COPY_PATH}"
diff --git a/experiment/scripts/monitor.sh b/experiment/scripts/monitor.sh
index 3e86a05..80719ee 100755
--- a/experiment/scripts/monitor.sh
+++ b/experiment/scripts/monitor.sh
@@ -189,6 +189,9 @@ function remove_exchanges_from_prometheus() {
function init_monitor() {
update_grafana
+
+ sed -i "s|<EXPERIMENT_DATA_DIR_HERE>|${EXPERIMENT_DATA_DIR}|g" \
+ /etc/monitor/loki.yaml
systemctl restart loki \
promtail
diff --git a/experiment/scripts/setup.sh b/experiment/scripts/setup.sh
index 857f52a..f1d9d13 100644
--- a/experiment/scripts/setup.sh
+++ b/experiment/scripts/setup.sh
@@ -73,6 +73,17 @@ function setup_log_dir() {
echo "LOG_DIR=${LOG_DIR}" >> ~/.env
}
+# Create the directory which will be used to store recovery data
+# for the experiment (prometheus + loki)
+function setup_data_dir() {
+ EXPERIMENT_DATA_DIR="${LOG_DIR}/exp-data"
+ if [ -f ${EXPERIMENT_DATA_DIR} ]; then
+ rm -rf ${EXPERIMENT_DATA_DIR}
+ fi
+ mkdir ${EXPERIMENT_DATA_DIR}
+ echo "EXPERIMENT_DATA_DIR=${EXPERIMENT_DATA_DIR}" >> ~/.env
+}
+
# Setup the environment configuration
function setup_environment() {
set +x
@@ -184,6 +195,7 @@ function clean_previous_setup() {
clean_previous_setup
parse_experiment_nodes
setup_log_dir
+setup_data_dir
setup_environment
setup_config
setup_dns
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [taler-grid5k] 82/189: include secondary exchanges in prometheus, (continued)
- [taler-grid5k] 82/189: include secondary exchanges in prometheus, gnunet, 2022/04/28
- [taler-grid5k] 115/189: fix sharding setup, finish explain script summary, gnunet, 2022/04/28
- [taler-grid5k] 107/189: fix most time consuming, gnunet, 2022/04/28
- [taler-grid5k] 73/189: better endpoint pipeline definition, gnunet, 2022/04/28
- [taler-grid5k] 68/189: add possibility to run multiple exchange-httpd hosts - services must be run as root, since the nfs does allow to change permissions, gnunet, 2022/04/28
- [taler-grid5k] 94/189: merchant behind nginx, gnunet, 2022/04/28
- [taler-grid5k] 85/189: fixes, gnunet, 2022/04/28
- [taler-grid5k] 146/189: update recovery, gnunet, 2022/04/28
- [taler-grid5k] 179/189: fix tar path of centos, gnunet, 2022/04/28
- [taler-grid5k] 181/189: centos database working, gnunet, 2022/04/28
- [taler-grid5k] 140/189: move loki data to nfs directly as lost in copy,
gnunet <=
- [taler-grid5k] 126/189: try further with promtail metrics, gnunet, 2022/04/28
- [taler-grid5k] 27/189: include shards in monitoring, gnunet, 2022/04/28
- [taler-grid5k] 24/189: fix missing with tar, gnunet, 2022/04/28
- [taler-grid5k] 42/189: hopefully better logging of slow queries with pg_stat_statements, gnunet, 2022/04/28
- [taler-grid5k] 39/189: test do not partition wire_targets, gnunet, 2022/04/28
- [taler-grid5k] 14/189: aggregation tracking cant be sharded, gnunet, 2022/04/28
- [taler-grid5k] 30/189: log nested statements to hopefully get function debug output, gnunet, 2022/04/28
- [taler-grid5k] 16/189: re-enable sharding of aggregation tracking tables, gnunet, 2022/04/28
- [taler-grid5k] 43/189: again without analyze, gnunet, 2022/04/28
- [taler-grid5k] 47/189: add monitor to dahu because of link speed, gnunet, 2022/04/28