osmith has uploaded this change for review. ( https://gerrit.osmocom.org/c/osmo-ci/+/40138?usp=email )
Change subject: ansible: build-hosts: add testenv-coredump-helper ......................................................................
ansible: build-hosts: add testenv-coredump-helper
The Osmocom jenkins nodes run inside LXCs. When we get a coredump it appears on the host. Add a helper script to the hosts so the jenkins jobs can fetch the coredumps in case an Osmocom program crashes while running a ttcn3 testsuite.
The helper script has the following safety features to ensure jenkins can't just fetch any coredump:
* Only fetch coredumps within the last 3 seconds and only if the executable matches osmo-* or open5gs-* * Only listen on the lxc IP
Related: OS#6769 Change-Id: I7e66c98106b7028a393e3b873e96ae2dcb412c48 --- A ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.py A ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.service A ansible/roles/testenv-coredump-helper/handlers/main.yml A ansible/roles/testenv-coredump-helper/tasks/main.yml M ansible/setup-build-host.yml 5 files changed, 160 insertions(+), 0 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmo-ci refs/changes/38/40138/1
diff --git a/ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.py b/ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.py new file mode 100644 index 0000000..acbed17 --- /dev/null +++ b/ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# Copyright 2025 sysmocom - s.f.m.c. GmbH +# SPDX-License-Identifier: GPL-3.0-or-later +# OS#6769: simple webserver to make Osmocom related coredumps available in LXCs +import datetime +import fnmatch +import http.server +import json +import os +import shutil +import signal +import socket +import socketserver +import subprocess +import sys +import tempfile + + +NETDEV = "lxcbr0" +IP_PATTERN = "10.0.*" +PORT = 8042 + + +def find_lxc_ip(): + cmd = ["ip", "-j", "-o", "-4", "addr", "show", "dev", NETDEV] + p = subprocess.run(cmd, capture_output=True, text=True, check=True) + ret = json.loads(p.stdout)[0]["addr_info"][0]["local"] + if not fnmatch.fnmatch(ret, IP_PATTERN): + print(f"ERROR: IP doesn't match pattern {IP_PATTERN}: {ret}") + sys.exit(1) + return ret + + +def executable_is_relevant(exe): + basename = os.path.basename(exe) + patterns = [ + "open5gs-*", + "osmo-*", + ] + + for pattern in patterns: + if fnmatch.fnmatch(basename, pattern): + return True + + return False + + +class CustomRequestHandler(http.server.SimpleHTTPRequestHandler): + def do_GET(self): + if self.path == "/core": + # Check for any coredump within last 3 seconds + since = (datetime.datetime.now() - datetime.timedelta(seconds=3)).strftime("%Y-%m-%d %H:%M:%S") + cmd = ["coredumpctl", "-q", "-S", since, "--json=short", "-n1"] + + p = subprocess.run(cmd, capture_output=True, text=True) + if p.returncode != 0: + self.send_error(404, "No coredump found") + return None + + # Check if the coredump executable is from osmo-*, open5gs-*, etc. + coredump = json.loads(p.stdout)[0] + if not executable_is_relevant(coredump["exe"]): + self.send_error(404, "No coredump found") + return None + + # Put coredump into a temporary file and return it + with tempfile.TemporaryDirectory() as tmpdirname: + core_path = os.path.join(tmpdirname, "core") + cmd = [ + "coredumpctl", + "dump", + "-q", + "-S", + since, + "-o", + core_path, + str(coredump["pid"]), + coredump["exe"], + ] + subprocess.run(cmd, stdout=subprocess.DEVNULL, check=True) + + with open(core_path, "rb") as f: + self.send_response(200) + self.send_header("X-Executable-Path", coredump["exe"]) + self.end_headers() + self.wfile.write(f.read()) + else: + self.send_error(404, "File Not Found") + + +def signal_handler(sig, frame): + sys.exit(0) + + +def main(): + if not shutil.which("coredumpctl"): + print("ERROR: coredumpctl not found!") + sys.exit(1) + + ip = os.environ.get("LXC_HOST_IP") or find_lxc_ip() + print(f"Listening on {ip}:{PORT}") + signal.signal(signal.SIGINT, signal_handler) + with socketserver.TCPServer((ip, PORT), CustomRequestHandler, False) as httpd: + httpd.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + httpd.server_bind() + httpd.server_activate() + httpd.serve_forever() + + +if __name__ == "__main__": + main() diff --git a/ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.service b/ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.service new file mode 100644 index 0000000..ef5a851 --- /dev/null +++ b/ansible/roles/testenv-coredump-helper/files/testenv-coredump-helper.service @@ -0,0 +1,12 @@ +[Unit] +Description=testenv coredump helper +After=lxc.service + +[Service] +Environment="PYTHONUNBUFFERED=1" +Type=simple +Restart=always +ExecStart=/opt/testenv-coredump-helper/testenv-coredump-helper + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/testenv-coredump-helper/handlers/main.yml b/ansible/roles/testenv-coredump-helper/handlers/main.yml new file mode 100644 index 0000000..0f7ef57 --- /dev/null +++ b/ansible/roles/testenv-coredump-helper/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart testenv-coredump-helper + service: + name: testenv-coredump-helper + state: restarted diff --git a/ansible/roles/testenv-coredump-helper/tasks/main.yml b/ansible/roles/testenv-coredump-helper/tasks/main.yml new file mode 100644 index 0000000..9ff2769 --- /dev/null +++ b/ansible/roles/testenv-coredump-helper/tasks/main.yml @@ -0,0 +1,31 @@ +--- +- name: install coredumpctl + apt: + name: + - systemd-coredump + cache_valid_time: 3600 + update_cache: yes + +- name: mkdir /opt/testenv-coredump-helper + ansible.builtin.file: + path: /opt/testenv-coredump-helper + state: directory + +- name: install testenv-coredump-helper + ansible.builtin.copy: + src: testenv-coredump-helper.py + dest: /opt/testenv-coredump-helper/testenv-coredump-helper + mode: '0755' + notify: restart testenv-coredump-helper + +- name: install testenv-coredump-helper service + ansible.builtin.copy: + src: testenv-coredump-helper.service + dest: /etc/systemd/system/testenv-coredump-helper.service + mode: '0644' + notify: restart testenv-coredump-helper + +- name: enable testenv-coredump-helper service + ansible.builtin.systemd_service: + name: testenv-coredump-helper + enabled: true diff --git a/ansible/setup-build-host.yml b/ansible/setup-build-host.yml index ed8def5..d1d9874 100644 --- a/ansible/setup-build-host.yml +++ b/ansible/setup-build-host.yml @@ -18,3 +18,4 @@ update_cache: yes roles: - name: apt-allow-relinfo-change + - name: testenv-coredump-helper