laforge has submitted this change. ( https://gerrit.osmocom.org/c/osmo-remsim/+/42300?usp=email )
Change subject: bankd: Avoid osmocom logging mutex deadlock in signal handling
......................................................................
bankd: Avoid osmocom logging mutex deadlock in signal handling
The main thread communicates slotmap add + delete via POSIX signals
to the worker threads. As those signals interrupt the normal
processing of the worker thread, they might get delivered while the
thread is already logging something, causing a deadlock. This has
been observed in the real world in the following stack trace (where it's
actually two nested signals):
Thread 45 (Thread 0x7fa014ff96c0 (LWP 620753) "osmo-remsim-ban"):
#0 futex_wait (private=0, expected=2, futex_word=0x7fa0a992b360 <osmo_log_tgt_mutex>) at ../sysdeps/nptl/futex-internal.h:146
#1 __GI___lll_lock_wait (futex=futex@entry=0x7fa0a992b360 <osmo_log_tgt_mutex>, private=0) at ./nptl/lowlevellock.c:49
#2 0x00007fa0a9730482 in lll_mutex_lock_optimized (mutex=0x7fa0a992b360 <osmo_log_tgt_mutex>) at ./nptl/pthread_mutex_lock.c:48
#3 ___pthread_mutex_lock (mutex=0x7fa0a992b360 <osmo_log_tgt_mutex>) at ./nptl/pthread_mutex_lock.c:93
#4 0x00007fa0a98d878d in log_tgt_mutex_lock_impl () from /usr/local/lib/libosmocore.so.22
#5 0x00007fa0a98db9a1 in log_check_level () from /usr/local/lib/libosmocore.so.22
#6 0x0000559db6278e31 in handle_sig_mapadd (sig=<optimized out>) at bankd_main.c:558
#7 <signal handler called>
#8 0x00007fa0a9701548 in __vfprintf_internal (s=s@entry=0x7fa014fe3a50, format=format@entry=0x7fa0a991905a "%ld ", ap=ap@entry=0x7fa014fe3bd0, mode_flags=mode_flags@entry=0) at ./stdio-common/vfprintf-internal.c:983
#9 0x00007fa0a9722758 in __vsnprintf_internal (string=0x7fa014fe3eb0 "", maxlen=<optimized out>, format=0x7fa0a991905a "%ld ", args=args@entry=0x7fa014fe3bd0, mode_flags=mode_flags@entry=0) at ./libio/vsnprintf.c:114
#10 0x00007fa0a96fcca2 in __GI___snprintf (s=<optimized out>, maxlen=<optimized out>, format=<optimized out>) at ./stdio-common/snprintf.c:31
#11 0x00007fa0a98d91b4 in _output_buf () from /usr/local/lib/libosmocore.so.22
#12 0x00007fa0a98d9def in _output () from /usr/local/lib/libosmocore.so.22
#13 0x00007fa0a98da0ef in osmo_vlogp () from /usr/local/lib/libosmocore.so.22
#14 0x00007fa0a98da2bb in logp2 () from /usr/local/lib/libosmocore.so.22
#15 0x0000559db62790ba in handle_sig_mapdel (sig=35) at bankd_main.c:546
#16 <signal handler called>
#17 0x00007fa0a97a025f in __GI___poll (fds=0x7fa014fe5650, nfds=1, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
#18 0x00007fa0a9892d53 in ?? () from /lib/x86_64-linux-gnu/libpcsclite.so.1
#19 0x00007fa0a988fd9e in SCardTransmit () from /lib/x86_64-linux-gnu/libpcsclite.so.1
#20 0x0000559db627c9ef in pcsc_transceive (worker=0x559dbd5c5750, out=<optimized out>, out_len=<optimized out>, in=<optimized out>, in_len=0x7fa014fe57b8) at bankd_pcsc.c:319
#21 0x0000559db627a479 in worker_handle_tpduModemToCard (pdu=0x7fa050003a50, worker=0x559dbd5c5750) at bankd_main.c:826
#22 worker_handle_rspro (pdu=0x7fa050003a50, worker=0x559dbd5c5750) at bankd_main.c:892
#23 worker_transceive_loop (worker=0x559dbd5c5750) at bankd_main.c:1005
#24 0x0000559db627aff2 in worker_main (arg=<optimized out>) at bankd_main.c:1079
#25 0x00007fa0a972d1f5 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#26 0x00007fa0a97ad8dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
As a hot-fix, let's avoid logging from the handle_sig_map{del,add}()
functions at all, making them safe against a deadlock around this mutex.
We should decide how to proceed in general with potentially some
architectural changes later on; any such changes are not suitable as a
hot fix due to their potential of introducing other regressions.
Change-Id: I5ea32886dfaf624b4dc5ad7924941c7b904c1d36
Related: SYS#7930
---
M src/bankd/bankd_main.c
1 file changed, 12 insertions(+), 10 deletions(-)
Approvals:
Jenkins Builder: Verified
laforge: Looks good to me, approved
diff --git a/src/bankd/bankd_main.c b/src/bankd/bankd_main.c
index 1adc34c..5e33b41 100644
--- a/src/bankd/bankd_main.c
+++ b/src/bankd/bankd_main.c
@@ -521,9 +521,10 @@
static int worker_send_rspro(struct bankd_worker *worker, RsproPDU_t *pdu);
-static void worker_set_state(struct bankd_worker *worker, enum bankd_worker_state new_state)
+static void worker_set_state(struct bankd_worker *worker, enum bankd_worker_state new_state, bool quiet)
{
- LOGW(worker, "Changing state to %s\n", get_value_string(worker_state_names, new_state));
+ if (!quiet)
+ LOGW(worker, "Changing state to %s\n", get_value_string(worker_state_names, new_state));
worker->state = new_state;
worker->timeout = 0;
}
@@ -540,25 +541,26 @@
/* signal handler for receiving SIGMAPDEL from main thread */
static void handle_sig_mapdel(int sig)
{
- LOGW(g_worker, "SIGMAPDEL received: Main thread informs us our map is gone\n");
+ /* DO NOT LOG ANYTHING HERE, IT WILL DEADLOCK WITH THE osmo_log_tgt_mutex */
OSMO_ASSERT(sig == SIGMAPDEL);
if (g_worker->state >= BW_ST_CONN_CLIENT_MAPPED) {
g_worker->slot.bank_id = 0xffff;
g_worker->slot.slot_nr = 0xffff;
- worker_set_state(g_worker, BW_ST_CONN_CLIENT_UNMAPPED);
+ worker_set_state(g_worker, BW_ST_CONN_CLIENT_UNMAPPED, true);
}
}
/* signal handler for receiving SIGMAPADD from main thread */
static void handle_sig_mapadd(int sig)
{
- LOGW(g_worker, "SIGMAPADD received\n");
+ /* DO NOT LOG ANYTHING HERE, IT WILL DEADLOCK WITH THE osmo_log_tgt_mutex */
/* do nothing */
}
static void handle_sig_usr1(int sig)
{
OSMO_ASSERT(sig == SIGUSR1);
+ /* FIXME: WE SHOULD NOT LOG ANYTHING HERE, IT WILL DEADLOCK WITH THE osmo_log_tgt_mutex */
if (pthread_equal(g_bankd->main, pthread_self())) {
struct bankd_worker *worker;
@@ -612,7 +614,7 @@
if (rc < 0)
return rc;
- worker_set_state(worker, BW_ST_CONN_CLIENT_MAPPED_CARD);
+ worker_set_state(worker, BW_ST_CONN_CLIENT_MAPPED_CARD, false);
/* FIXME: notify client about this state change */
return 0;
@@ -763,7 +765,7 @@
}
worker->client.clslot.client_id = pdu->msg.choice.connectClientReq.clientSlot->clientId;
worker->client.clslot.slot_nr = pdu->msg.choice.connectClientReq.clientSlot->slotNr;
- worker_set_state(worker, BW_ST_CONN_CLIENT);
+ worker_set_state(worker, BW_ST_CONN_CLIENT, false);
if (worker_try_slotmap(worker) >= 0)
res = ResultCode_ok;
@@ -1041,7 +1043,7 @@
g_worker = (struct bankd_worker *) arg;
- worker_set_state(g_worker, BW_ST_INIT);
+ worker_set_state(g_worker, BW_ST_INIT, false);
/* not permitted in multithreaded environment */
talloc_disable_null_tracking();
@@ -1065,7 +1067,7 @@
g_worker->client.peer_addr_len = sizeof(g_worker->client.peer_addr);
- worker_set_state(g_worker, BW_ST_ACCEPTING);
+ worker_set_state(g_worker, BW_ST_ACCEPTING, false);
/* first wait for an incoming TCP connection */
rc = accept(g_worker->bankd->accept_fd, (struct sockaddr *) &g_worker->client.peer_addr,
&g_worker->client.peer_addr_len);
@@ -1075,7 +1077,7 @@
g_worker->client.fd = rc;
worker_client_addrstr(buf, sizeof(buf), g_worker);
LOGW(g_worker, "Accepted connection from %s\n", buf);
- worker_set_state(g_worker, BW_ST_CONN_WAIT_ID);
+ worker_set_state(g_worker, BW_ST_CONN_WAIT_ID, false);
/* run the main worker transceive loop body until there was some error */
while (1) {
--
To view, visit https://gerrit.osmocom.org/c/osmo-remsim/+/42300?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: osmo-remsim
Gerrit-Branch: master
Gerrit-Change-Id: I5ea32886dfaf624b4dc5ad7924941c7b904c1d36
Gerrit-Change-Number: 42300
Gerrit-PatchSet: 2
Gerrit-Owner: laforge <laforge(a)osmocom.org>
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria <vyanitskiy(a)sysmocom.de>
Gerrit-Reviewer: laforge <laforge(a)osmocom.org>
Gerrit-Reviewer: lynxis lazus <lynxis(a)fe80.eu>
Gerrit-Reviewer: pespin <pespin(a)sysmocom.de>
Attention is currently required from: fixeria, lynxis lazus, pespin.
laforge has posted comments on this change by laforge. ( https://gerrit.osmocom.org/c/osmo-remsim/+/42300?usp=email )
Change subject: bankd: Avoid osmocom logging mutex deadlock in signal handling
......................................................................
Patch Set 2: Code-Review+2
(1 comment)
Commit Message:
https://gerrit.osmocom.org/c/osmo-remsim/+/42300/comment/af528f29_1f2fb8e2?… :
PS1, Line 13: the following stack trace
> the stack trace is missing?
mh, I guess git has removed it due to it containing hash marks. IT was definitely in my local editor...
--
To view, visit https://gerrit.osmocom.org/c/osmo-remsim/+/42300?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings?usp=email
Gerrit-MessageType: comment
Gerrit-Project: osmo-remsim
Gerrit-Branch: master
Gerrit-Change-Id: I5ea32886dfaf624b4dc5ad7924941c7b904c1d36
Gerrit-Change-Number: 42300
Gerrit-PatchSet: 2
Gerrit-Owner: laforge <laforge(a)osmocom.org>
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria <vyanitskiy(a)sysmocom.de>
Gerrit-Reviewer: laforge <laforge(a)osmocom.org>
Gerrit-Reviewer: lynxis lazus <lynxis(a)fe80.eu>
Gerrit-Reviewer: pespin <pespin(a)sysmocom.de>
Gerrit-Attention: fixeria <vyanitskiy(a)sysmocom.de>
Gerrit-Attention: pespin <pespin(a)sysmocom.de>
Gerrit-Attention: lynxis lazus <lynxis(a)fe80.eu>
Gerrit-Comment-Date: Tue, 10 Mar 2026 09:25:00 +0000
Gerrit-HasComments: Yes
Gerrit-Has-Labels: Yes
Comment-In-Reply-To: fixeria <vyanitskiy(a)sysmocom.de>
laforge has submitted this change. ( https://gerrit.osmocom.org/c/pysim/+/42278?usp=email )
Change subject: tests/pySim-smpp2sim_test/card_sanitizer: update card backup with new test keyset
......................................................................
tests/pySim-smpp2sim_test/card_sanitizer: update card backup with new test keyset
In our test setup we run the card_sanitizer.py script regualary to ensure that
we have consistent start conditions when running our tests. In case a testcase
crashes for some reason and leaves messed up files on a test card. The
card_sanitizer.py script will ensure that any problem like that is cleaned up
over night.
For the testcases we are about to add in the patch following this one, we need
to provision a new test keyset to one of our test cards. This has been already
done manually. However since the card_sanitizer still has the old keys in its
backup we will have to update that as well.
Change-Id: I5aa8a413b19b3e43a79d03e904daab50b4b1e767
Related: OS#6868
---
M tests/card_sanitizer/card_backup_3b9f96801f878031e073fe211b674a357530350265f8_8949440000001155314.script
1 file changed, 3 insertions(+), 3 deletions(-)
Approvals:
laforge: Looks good to me, approved
Jenkins Builder: Verified
diff --git a/tests/card_sanitizer/card_backup_3b9f96801f878031e073fe211b674a357530350265f8_8949440000001155314.script b/tests/card_sanitizer/card_backup_3b9f96801f878031e073fe211b674a357530350265f8_8949440000001155314.script
index 682d700..4285771 100644
--- a/tests/card_sanitizer/card_backup_3b9f96801f878031e073fe211b674a357530350265f8_8949440000001155314.script
+++ b/tests/card_sanitizer/card_backup_3b9f96801f878031e073fe211b674a357530350265f8_8949440000001155314.script
@@ -2200,9 +2200,9 @@
update_record 7 fe02101da012f436d06824ecdd15050419ff9affffffffffffffffffffffffffffffff
update_record 8 fe02116929a373388ac904aff57ff57f6b3431ffffffffffffffffffffffffffffffff
update_record 9 fe0212a99245a5dc814e2f4c1aa908e9946e03ffffffffffffffffffffffffffffffff
-update_record 10 fe0310521312c05a9aea93d70d44405172a580ffffffffffffffffffffffffffffffff
-update_record 11 fe0311a9e45c72d45abde7db74261ee0c11b1bffffffffffffffffffffffffffffffff
-update_record 12 fe0312867ba36b5873d60ea8b2cdcf3c0ddddaffffffffffffffffffffffffffffffff
+update_record 10 fe03601111111111111111111111111111111111111111111111111111111111111111
+update_record 11 fe03612222222222222222222222222222222222222222222222222222222222222222
+update_record 12 fe03623333333333333333333333333333333333333333333333333333333333333333
#
################################################################################
# MF/DF.SYSTEM/EF.SIM_AUTH_COUNTER #
--
To view, visit https://gerrit.osmocom.org/c/pysim/+/42278?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: pysim
Gerrit-Branch: master
Gerrit-Change-Id: I5aa8a413b19b3e43a79d03e904daab50b4b1e767
Gerrit-Change-Number: 42278
Gerrit-PatchSet: 2
Gerrit-Owner: dexter <pmaier(a)sysmocom.de>
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: daniel <dwillmann(a)sysmocom.de>
Gerrit-Reviewer: laforge <laforge(a)osmocom.org>