[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[taler-grid5k] 111/189: explain analyze now working with correct postgre
From: |
gnunet |
Subject: |
[taler-grid5k] 111/189: explain analyze now working with correct postgres config |
Date: |
Thu, 28 Apr 2022 10:48:01 +0200 |
This is an automated email from the git hooks/post-receive script.
marco-boss pushed a commit to branch master
in repository grid5k.
commit 0d055ac451c178c032eebf8c5573d37abbfc0d07
Author: Boss Marco <bossm8@bfh.ch>
AuthorDate: Thu Mar 24 11:45:55 2022 +0100
explain analyze now working with correct postgres config
---
additional/explain-visualizer/explain.py | 183 ++++++++++++++++++++-----------
experiment/scripts/database.sh | 10 +-
2 files changed, 125 insertions(+), 68 deletions(-)
diff --git a/additional/explain-visualizer/explain.py
b/additional/explain-visualizer/explain.py
index 3d91640..f5eb57a 100755
--- a/additional/explain-visualizer/explain.py
+++ b/additional/explain-visualizer/explain.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+# Script to analyze the queries executed against the taler-exchange db
from json import loads
from re import compile, sub, escape
@@ -83,7 +84,11 @@ log_get_params_re = None
args = None
-def get_explain(sql) -> dict:
+
+def print_query(sql):
+ """
+ Print an sql query to stdout
+ """
print("\n" + "=" * 80 + "\n")
if sqlparse:
print(sqlparse.format(sql,
@@ -93,6 +98,12 @@ def get_explain(sql) -> dict:
else:
print(fill(text=sql, width=80))
print("\n" + "=" * 80 + "\n")
+
+
+def get_explain(sql) -> dict:
+ """
+ Execute EXPLAIN ANALYZE against the database, returns the plan as JSON
dictonary
+ """
sql = sql.replace("\n", " ").replace('"', "")
sql = "EXPLAIN (ANALYZE, VERBOSE, BUFFERS, FORMAT JSON) " + sql + ";"
try:
@@ -114,23 +125,35 @@ def get_explain(sql) -> dict:
def setup_log_regex():
+ """
+ Compile the regular expressions used to identify statements in the logs
+ Must be called only then log_id is configured in args
+ """
global log_statement_re, log_execute_re, log_get_params_re
log_statement_re = compile(f'.*{args.log_id} LOG: statement:
((?!COMMIT)(?!BEGIN)(?!SET)(?!START).*);')
- log_execute_re = compile(f'.*\[(\d+)-(\d+)\].*\[(\d+)\] {args.log_id} LOG:
execute [a-zA-Z0-9_]+: ((?!COMMIT)(?!BEGIN)(?!SET)(?!START).*)')
+ log_execute_re = compile(f'.*{args.log_id} LOG: execute [a-zA-Z0-9_]+:
((?!COMMIT)(?!BEGIN)(?!SET)(?!START)[^#]+)(?:#.*DETAIL: parameters: (.*))?')
log_get_params_re = compile('(\$\d+)+ = ([^, ]+)')
-def ensure_partition_added(relation, indent):
+def ensure_relation_added(relation, indent):
+ """
+ Ensure that a relation is added in the plans dictionary
+ """
if relation not in plans:
plans[relation] = Plan(relation, Type.RELATION, indent)
-
def ensure_plan_added(plan, indent):
+ """
+ Ensure an uncategorized plan is added in the plans dictionary
+ """
idx = plan + str(indent)
plans[idx] = Plan(plan, Type.OTHER, indent)
def ensure_modify_added(plan, indent):
+ """
+ Ensure a plan with type 'ModifyTable' is in the plans dictionary
+ """
idx = plan['Relation Name']
p = Plan(plan['Relation Name'], Type.MODIFY, indent)
p.operation = plan['Operation']
@@ -142,20 +165,27 @@ def ensure_modify_added(plan, indent):
def check_most_expensive_time(plan: Plan):
+ """
+ Check and update the most expensive relation scan
+ """
global most_time_consuming
if most_time_consuming.time_spent < plan.time_spent:
most_time_consuming = plan
def parse_scan(plan, indent):
+ """
+ Parse a JSON plan which scans a relation (not CTE Scans)
+ """
relation = plan['Relation Name']
- # Remove partition suffix
+ # Remove partition suffix as we do not want each partition,
+ # but only the relation shown
if partitioned_re.match(relation):
relation = relation.split('_')[0:-1]
relation = "_".join(relation)
- ensure_partition_added(relation, indent)
+ ensure_relation_added(relation, indent)
_plan = plans[relation]
if plan['Actual Loops'] > 0:
@@ -174,11 +204,17 @@ def parse_scan(plan, indent):
def iterate_plans():
+ """
+ Iterate the plans dict and return each plan
+ """
for plan in plans:
yield plans[plan]
def parse_plan(plan, indent=2):
+ """
+ Parse the JSON plan returned from EXPLAIN (ANALYZE, BUFFERS, VERBOSE
FORMAT JSON)
+ """
if plan['Node Type'] == 'Function Scan':
ensure_plan_added(plan['Node Type'], indent)
elif 'Scan' in plan['Node Type'] and 'CTE' not in plan['Node Type']:
@@ -196,6 +232,9 @@ def parse_plan(plan, indent=2):
def print_summary(attr_name):
+ """
+ Print a summary of the attribute with attr_name of the Plan class
+ """
total = 0
for plan in iterate_plans():
if plan.type == Type.RELATION:
@@ -206,6 +245,9 @@ def print_summary(attr_name):
def print_non_indexed():
+ """
+ Print all non indexed scans
+ """
total = 0
for plan in iterate_plans():
if plan.scan_type == 'Seq Scan':
@@ -215,6 +257,9 @@ def print_non_indexed():
def visualize(analyze_json):
+ """
+ Parse and visualize a json plan returned from EXPLAIN (ANALYZE, BUFFERS,
VERBOSE, FORMAT JSON)
+ """
if analyze_json is None:
return
@@ -249,89 +294,95 @@ def visualize(analyze_json):
print("")
-def analyze_from_query():
+def handle_query():
+ """
+ Handle and explain analyze a query passed as argument
+ """
+ print_query(args.sql)
explain = get_explain(args.sql)
visualize(explain)
-def handle_log_statement(sql):
-
+def analyze_log_query(sql):
+ """
+ Analyze a statement extracted from the logs
+ """
# reset global plans
global plans, most_time_consuming
plans = {}
most_time_consuming = Plan("default", Type.RELATION, 0)
+ print_query(sql)
explain = get_explain(sql)
visualize(explain)
-def analyze_from_logs():
+def handle_log_statement(match):
+ """
+ Handle a log line which begins with statement:
+ """
+ statement = match.group(1)
+ analyze_log_query(statement)
+
+
+def handle_log_execute(match):
+ """
+ Handle a log line which begins with execute:
+ This method will replace all parameters in the statement which are found
in `parameters:'
+ """
+ params = None
+ statement = match.group(1)
+
+ if '$' in statement:
+ _parameters = match.group(2)
+
+ if not _parameters:
+ print_query(statement)
+ print("ERROR: Could not find query parameters\n")
+ print("Make sure to set the following in postgres.conf before
collecting logs:\n")
+ print(" 'log_statement=all'")
+ print(" 'syslog_split_messages=off'")
+ print("\n")
+ exit(1)
+
+ params = log_get_params_re.findall(_parameters)
+
+ for param in params:
+ statement = sub(escape(param[0]) + r'\b',
+ escape(param[1]),
+ statement)
+
+ analyze_log_query(statement)
+
+
+def handle_logs():
+ """
+ Parse a log file and explain analyze all statements found matching the
log_id.
+ The script expects all parameters and the whole statement in one log line.
+ Thus the following parameters must be set in postgres.conf:
+ log_statement=all
+ syslog_split_messages=off
+ (Syslog must be able to handle large messages)
+ """
logs = None
+ # Needs to be called here since log_id is not set previously
setup_log_regex()
try:
logs = open(args.file, 'r')
except IOError as e:
- print("Error: cannot open file {args.file}")
+ print(f"Error: cannot open file {args.file}")
print(e.strerror)
exit(1)
- lines = logs.readlines()
- n_lines = len(lines)
-
- for idx, line in enumerate(lines):
+ for line in logs.readlines():
- statement = None
- params = None
-
- if match := log_statement_re.match(line.strip()):
- statement = match.group(1)
-
- elif match := log_execute_re.match(line.strip()):
-
- id = match.group(1)
- n = int(match.group(2)) + 1
-
- pid = match.group(3)
- statement = match.group(4)
-
- # optimistically assume that the two line statement's second
- # line is in between the next then lines
- match = None
- if statement[-1] != ';':
- idx_curr = idx + 1
- multi_line_re = compile(f'.*\[{id}-{n}] (.*)')
- while idx_curr < idx + 10:
- if match := multi_line_re.match(lines[idx_curr]):
- break
- idx_curr += 1
-
- if match is not None:
- n += 1
- statement = statement + " " + match.group(1)
-
- if '$' in statement:
- params_re = compile(f'.*\[{id}-{n}].*\[{pid}\] {args.log_id}
DETAIL: parameters: (\$\d+ = .*)')
-
- idx_curr = idx + 1
- match = None
- while idx_curr < n_lines:
- if match := params_re.match(lines[idx_curr]):
- break
- idx_curr += 1
-
- if match is None:
- print(f"ERROR: Could not find params for
query\n\n{statement}\n")
- continue
-
- params = log_get_params_re.findall(match.group(1))
- for param in params:
- statement = sub(escape(param[0]) + r'\b',
escape(param[1]), statement)
+ if match := log_statement_re.match(line):
+ handle_log_statement(match)
- if statement is not None:
- handle_log_statement(statement)
- statement = None
+ elif match := log_execute_re.match(line):
+ handle_log_execute(match)
logs.close()
@@ -384,7 +435,7 @@ def main():
dest='file',
type=str,
help='Log file to parse and execute queries')
- log_p.set_defaults(func=analyze_from_logs)
+ log_p.set_defaults(func=handle_logs)
sql_p = s_p.add_parser('query',
parents=[global_args],
@@ -393,7 +444,7 @@ def main():
dest='sql',
type=str,
help='The query to run and analyze')
- sql_p.set_defaults(func=analyze_from_query)
+ sql_p.set_defaults(func=handle_query)
global args
args = parser.parse_args()
diff --git a/experiment/scripts/database.sh b/experiment/scripts/database.sh
index 1e0d4da..09e6e05 100755
--- a/experiment/scripts/database.sh
+++ b/experiment/scripts/database.sh
@@ -68,6 +68,9 @@ function setup_config() {
sysctl -p
fi
+ # Allow longer lines to be logged by postgres
+ sed -i '1 i\$MaxMessageSize 4k' /etc/rsyslog.conf
+
# Configure postgres with an additional file and include this
# in the main configuration
echo "
@@ -77,7 +80,7 @@ function setup_config() {
syslog_ident='taler-database'
log_error_verbosity=terse
- # for pgbadger
+ # For pgbadger
# log_checkpoints=on
# log_connections=on
# log_disconnections=on
@@ -86,7 +89,10 @@ function setup_config() {
# log_autovacuum_min_duration=0
# log_error_verbosity=default
# log_duration=on
-
+ # log_statement=all
+
+ # For explain.py
+ # syslog_split_messages=off
# log_statement=all
log_min_duration_statement=${DB_MIN_DURATION_STATEMENT}
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [taler-grid5k] 117/189: some db config, (continued)
- [taler-grid5k] 117/189: some db config, gnunet, 2022/04/28
- [taler-grid5k] 64/189: fix, gnunet, 2022/04/28
- [taler-grid5k] 96/189: fixes, add lookahead sign as param, gnunet, 2022/04/28
- [taler-grid5k] 70/189: fix, gnunet, 2022/04/28
- [taler-grid5k] 120/189: update postgres exporter to work when there is no merchant, update grafana dashboards, gnunet, 2022/04/28
- [taler-grid5k] 74/189: reduce amount of logging wallets, gnunet, 2022/04/28
- [taler-grid5k] 93/189: possibility to create multiple instances, gnunet, 2022/04/28
- [taler-grid5k] 92/189: configure merchant instance, gnunet, 2022/04/28
- [taler-grid5k] 97/189: fix, gnunet, 2022/04/28
- [taler-grid5k] 114/189: test with sharding, gnunet, 2022/04/28
- [taler-grid5k] 111/189: explain analyze now working with correct postgres config,
gnunet <=
- [taler-grid5k] 98/189: fix, gnunet, 2022/04/28
- [taler-grid5k] 102/189: fix, gnunet, 2022/04/28
- [taler-grid5k] 130/189: fix slow query amount, gnunet, 2022/04/28
- [taler-grid5k] 158/189: additional scripts improved, gnunet, 2022/04/28
- [taler-grid5k] 133/189: cleaner regex, gnunet, 2022/04/28
- [taler-grid5k] 156/189: fix, gnunet, 2022/04/28
- [taler-grid5k] 129/189: slow query time, gnunet, 2022/04/28
- [taler-grid5k] 152/189: deduplicate promtail, gnunet, 2022/04/28
- [taler-grid5k] 169/189: centos add node-exporter, gnunet, 2022/04/28
- [taler-grid5k] 185/189: update taler-perf, gnunet, 2022/04/28