osmith has uploaded this change for review. (
https://gerrit.osmocom.org/c/osmo-ttcn3-hacks/+/38185?usp=email )
Change subject: testenv: get coredump + backtrace on crash
......................................................................
testenv: get coredump + backtrace on crash
If the SUT or another test component crashes, check if a matching
coredump was registered in coredumpctl. If that is the case, then copy
it into the testdir and print + store the backtrace.
This solves the problem that it is especially tricky to get a good
backtrace when a component crashes inside a container. One needs to
grab the coredump from the host (usually handled by systemd-coredump,
we cannot override /proc/sys/kernel/core_pattern for containers so it
can't be handled in the container), then put the coredump into the
container and finally run gdb to get the backtrace inside the container
(where proper libraries and debug symbols are). This patch automates all
of these steps.
Pau requested this feature.
Related: OS#6494
Change-Id: I743c20968bda9b6d6fb9c2d23bef70ee11950761
---
M _testenv/data/podman/Dockerfile
A _testenv/testenv/coredump.py
M _testenv/testenv/daemons.py
M _testenv/testenv/podman.py
4 files changed, 100 insertions(+), 0 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmo-ttcn3-hacks refs/changes/85/38185/1
diff --git a/_testenv/data/podman/Dockerfile b/_testenv/data/podman/Dockerfile
index 95a8c66..42b0635 100644
--- a/_testenv/data/podman/Dockerfile
+++ b/_testenv/data/podman/Dockerfile
@@ -22,6 +22,7 @@
ccache \
cmake \
flex \
+ gdb \
git \
iproute2 \
iputils-ping \
diff --git a/_testenv/testenv/coredump.py b/_testenv/testenv/coredump.py
new file mode 100644
index 0000000..0d56328
--- /dev/null
+++ b/_testenv/testenv/coredump.py
@@ -0,0 +1,89 @@
+# Copyright 2024 sysmocom - s.f.m.c. GmbH
+# SPDX-License-Identifier: GPL-3.0-or-later
+import datetime
+import fnmatch
+import json
+import logging
+import os
+import shlex
+import shutil
+import subprocess
+import testenv
+import testenv.daemons
+import testenv.testdir
+
+executable_path = None
+
+
+def executable_is_relevant(exe):
+ if testenv.args.binary_repo:
+ patterns = [
+ "*/usr/bin/open5gs-*",
+ "*/usr/bin/osmo-*",
+ ]
+
+ for pattern in patterns:
+ if fnmatch.fnmatch(exe, pattern):
+ return True
+ else:
+ if exe.startswith(testenv.args.cache):
+ return True
+
+ return False
+
+
+def get_from_coredumpctl():
+ global executable_path
+
+ logging.info("Looking for a coredump")
+
+ if not shutil.which("coredumpctl"):
+ logging.debug("coredumpctl is not available, won't try to get
coredump")
+ return
+
+ # Check for any coredump within last 3 seconds
+ since = (datetime.datetime.now() -
datetime.timedelta(seconds=3)).strftime("%Y-%m-%d %H:%M:%S")
+ cmd = ["coredumpctl", "-q", "-S", since,
"--json=short", "-n1"]
+ logging.debug(f"+ {cmd}")
+
+ p = subprocess.run(cmd, capture_output=True, text=True)
+ if p.returncode != 0:
+ logging.debug("No coredump found")
+ return
+
+ # Check if the coredump executable is from osmo-*, open5gs-*, etc.
+ coredump = json.loads(p.stdout)[0]
+ if not executable_is_relevant(coredump["exe"]):
+ logging.debug("Found an unrelated coredump, ignoring")
+ return
+
+ logging.debug("Coredump found, copying to log dir")
+ core_path = f"{testenv.testdir.testdir}/core"
+ testenv.cmd.run(
+ ["coredumpctl", "dump", "-q", "-S",
since, "-o", core_path, str(coredump["pid"]),
coredump["exe"]],
+ stdout=subprocess.DEVNULL,
+ no_podman=True,
+ )
+
+ executable_path = coredump["exe"]
+
+
+def get_backtrace():
+ global executable_path
+
+ core_path = f"{testenv.testdir.testdir}/core"
+ if not executable_path or not os.path.exists(core_path):
+ return
+
+ logging.info("Running gdb to get a backtrace")
+
+ cmd = "gdb"
+ cmd += " --batch"
+ cmd += f" {shlex.quote(executable_path)}"
+ cmd += f" {shlex.quote(core_path)}"
+ cmd += " -ex bt"
+ cmd += f" | tee {shlex.quote(core_path)}.backtrace"
+
+ testenv.cmd.run(cmd)
+
+ executable_path = None
diff --git a/_testenv/testenv/daemons.py b/_testenv/testenv/daemons.py
index c7e8722..0c9c0dd 100644
--- a/_testenv/testenv/daemons.py
+++ b/_testenv/testenv/daemons.py
@@ -7,6 +7,7 @@
import shlex
import subprocess
import testenv
+import testenv.coredump
import testenv.testdir
import time
import sys
@@ -18,6 +19,8 @@
def init():
global run_shell_on_stop
+ atexit.register(testenv.coredump.get_backtrace)
+
if not testenv.args.podman:
atexit.register(stop)
if testenv.args.shell:
@@ -111,6 +114,8 @@
return
current_test = testenv.testsuite.get_current_test()
+ testenv.coredump.get_from_coredumpctl()
+
if current_test:
logging.error(f"{daemon_name} crashed during {current_test}!")
testenv.testsuite.wait_until_test_stopped()
diff --git a/_testenv/testenv/podman.py b/_testenv/testenv/podman.py
index 2668b0d..6d801fa 100644
--- a/_testenv/testenv/podman.py
+++ b/_testenv/testenv/podman.py
@@ -10,6 +10,7 @@
import subprocess
import testenv.cmd
import testenv.testdir
+import testenv.coredump
import time
image_name = None
@@ -276,6 +277,10 @@
if not is_running():
return
+ # If we have a coredump, we must get the backtrace by running gdb inside
+ # the container. So do it before stopping the container.
+ testenv.coredump.get_backtrace()
+
if not restart and run_shell_on_stop:
logging.info("Running interactive shell before stopping container
(--shell)")
--
To view, visit
https://gerrit.osmocom.org/c/osmo-ttcn3-hacks/+/38185?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gerrit.osmocom.org/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: osmo-ttcn3-hacks
Gerrit-Branch: master
Gerrit-Change-Id: I743c20968bda9b6d6fb9c2d23bef70ee11950761
Gerrit-Change-Number: 38185
Gerrit-PatchSet: 1
Gerrit-Owner: osmith <osmith(a)sysmocom.de>