pespin has uploaded this change for review. ( https://gerrit.osmocom.org/c/osmo-uecups/+/27739 )
Change subject: Fix use-after-free by tun thread after tun obj destroyed ......................................................................
Fix use-after-free by tun thread after tun obj destroyed
The main thread calls pthread_cancel before freeing the tun object. However, pthread_cancel doesn't kill the thread synchronously (man pthread_cancel). Hence, the tun thread may still be running for a while after the tun object is/has been(ing) freed. Let's avoid this by making sure the thread is stopped before freeing the object. To accomplish it, we must wait for the thread to be cancelled. A cleanup routie is added which will signal the "tun_released" message to the main thread through an osmo_itq, which will then free the object (since talloc context is managed by the main thread).
Related: SYS#5523 Change-Id: Idf005359afb41d3413b09281a9ff937d5eafcc7c --- M daemon/daemon_vty.c M daemon/internal.h M daemon/main.c M daemon/tun_device.c 4 files changed, 85 insertions(+), 15 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmo-uecups refs/changes/39/27739/1
diff --git a/daemon/daemon_vty.c b/daemon/daemon_vty.c index 3c44ed1..cfbe421 100644 --- a/daemon/daemon_vty.c +++ b/daemon/daemon_vty.c @@ -99,7 +99,7 @@ vty_out(vty, "Cannot destrory non-existant TUN%s", VTY_NEWLINE); return CMD_WARNING; } - _tun_device_deref_destroy(tun); + _tun_device_deref_release(tun); pthread_rwlock_unlock(&g_daemon->rwlock);
return CMD_SUCCESS; diff --git a/daemon/internal.h b/daemon/internal.h index 09ba52e..fc5708d 100644 --- a/daemon/internal.h +++ b/daemon/internal.h @@ -7,6 +7,7 @@ #include <sys/socket.h> #include <osmocom/core/linuxlist.h> #include <osmocom/core/write_queue.h> +#include <osmocom/core/it_q.h> #include <osmocom/core/utils.h>
struct nl_sock; @@ -84,6 +85,13 @@ /*********************************************************************** * TUN Device ***********************************************************************/ +/* Message sent tun thread -> main thread through osmo_itq */ +struct gtp_daemon_itq_msg { + struct llist_head list; + struct { + struct tun_device *tun; + } tun_released; /* tun became stopped and can be freed */ +};
struct tun_device { /* entry in global list */ @@ -110,6 +118,9 @@
/* the thread handling Rx from the tun fd */ pthread_t thread; + + /* Used to store messages to be sent to main thread, since tun thread doesn't allocate through talloc */ + struct gtp_daemon_itq_msg itq_msg; };
struct tun_device * @@ -121,9 +132,10 @@ struct tun_device * _tun_device_find(struct gtp_daemon *d, const char *devname);
-void _tun_device_deref_destroy(struct tun_device *tun); +void _tun_device_destroy(struct tun_device *tun);
bool _tun_device_release(struct tun_device *tun); +void _tun_device_deref_release(struct tun_device *tun);
bool tun_device_release(struct tun_device *tun);
@@ -222,6 +234,12 @@ struct osmo_stream_srv_link *cups_link; struct osmo_signalfd *signalfd;
+ /* inter-thread queue between main thread and workers, pass struct gtp_daemon_itq_msg: */ + struct osmo_it_q *itq; + + /* Number of tunnels in progrress of being released: */ + unsigned int reset_all_state_tun_remaining; + struct { char *cups_local_ip; uint16_t cups_local_port; diff --git a/daemon/main.c b/daemon/main.c index 76aeab5..c8f63f6 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -58,6 +58,7 @@ /* client socket */ struct osmo_stream_srv *srv; char sockname[OSMO_SOCK_NAME_MAXLEN]; + bool reset_all_state_res_pending; };
struct subprocess { @@ -493,8 +494,12 @@ subprocess_destroy(p, SIGKILL); }
- jres = gen_uecups_result("reset_all_state_res", "OK"); - cups_client_tx_json(cc, jres); + if (d->reset_all_state_tun_remaining == 0) { + jres = gen_uecups_result("reset_all_state_res", "OK"); + cups_client_tx_json(cc, jres); + } else { + cc->reset_all_state_res_pending = true; + }
return 0; } @@ -669,6 +674,31 @@ } }
+static void gtp_daemon_itq_read_cb(struct osmo_it_q *q, struct llist_head *item) +{ + struct gtp_daemon *d = (struct gtp_daemon *)q->data; + struct gtp_daemon_itq_msg *itq_msg = container_of(item, struct gtp_daemon_itq_msg, list); + + LOGP(DTUN, LOGL_DEBUG, "Rx new itq message from %s\n", + itq_msg->tun_released.tun->devname); + + _tun_device_destroy(itq_msg->tun_released.tun); + if (d->reset_all_state_tun_remaining > 0) { + d->reset_all_state_tun_remaining--; + if (d->reset_all_state_tun_remaining == 0) { + struct cups_client *cc; + llist_for_each_entry(cc, &d->cups_clients, list) { + json_t * jres; + if (!cc->reset_all_state_res_pending) + continue; + cc->reset_all_state_res_pending = false; + jres = gen_uecups_result("reset_all_state_res", "OK"); + cups_client_tx_json(cc, jres); + } + } + } +} + static struct gtp_daemon *gtp_daemon_alloc(void *ctx) { struct gtp_daemon *d = talloc_zero(ctx, struct gtp_daemon); @@ -682,6 +712,9 @@ pthread_rwlock_init(&d->rwlock, NULL); d->main_thread = pthread_self();
+ d->itq = osmo_it_q_alloc(d, "itq", 4096, gtp_daemon_itq_read_cb, d); + osmo_fd_register(&d->itq->event_ofd); + INIT_LLIST_HEAD(&d->cups_clients);
d->cfg.cups_local_ip = talloc_strdup(d, "localhost"); diff --git a/daemon/tun_device.c b/daemon/tun_device.c index 7d1948f..86be67f 100644 --- a/daemon/tun_device.c +++ b/daemon/tun_device.c @@ -120,6 +120,14 @@ return 0; }
+static void tun_device_pthread_cleanup_routine(void *data) +{ + struct tun_device *tun = data; + LOGTUN(tun, LOGL_DEBUG, "pthread_cleanup\n"); + int rc = osmo_it_q_enqueue(tun->d->itq, &tun->itq_msg, list); + OSMO_ASSERT(rc == 0); +} + /* one thread for reading from each TUN device (TUN -> GTP encapsulation) */ static void *tun_device_thread(void *arg) { @@ -136,6 +144,8 @@ gtph->flags = 0x30; gtph->type = GTP_TPDU;
+ pthread_cleanup_push(tun_device_pthread_cleanup_routine, tun); + while (1) { struct gtp_tunnel *t; struct pkt_info pinfo; @@ -187,6 +197,8 @@ exit(1); } } + + pthread_cleanup_pop(tun_device_pthread_cleanup_routine); }
static int tun_open(int flags, const char *name) @@ -376,24 +388,24 @@ return tun; }
-/* UNLOCKED hard/forced destroy; caller must make sure references are cleaned up */ -static void _tun_device_destroy(struct tun_device *tun) +/* UNLOCKED hard/forced destroy; caller must make sure references are cleaned + * up, and tun thread is stopped beforehand by calling + * _tun_device_{deref_}release */ +void _tun_device_destroy(struct tun_device *tun) { /* talloc is not thread safe, all alloc/free must come from main thread */ ASSERT_MAIN_THREAD(tun->d); + LOGTUN(tun, LOGL_INFO, "Destroying\n");
- pthread_cancel(tun->thread); - llist_del(&tun->list); if (tun->netns_name) close(tun->netns_fd); close(tun->fd); nl_socket_free(tun->nl); - LOGTUN(tun, LOGL_INFO, "Destroying\n"); talloc_free(tun); }
-/* UNLOCKED remove all objects referencing this tun and then destroy */ -void _tun_device_deref_destroy(struct tun_device *tun) +/* UNLOCKED remove all objects referencing this tun and then start async tun release procedure */ +void _tun_device_deref_release(struct tun_device *tun) { struct gtp_daemon *d = tun->d; char *devname = talloc_strdup(d, tun->devname); @@ -412,12 +424,12 @@ * check if the tun can still be found in the list */ tun2 = _tun_device_find(d, devname); if (tun2 && tun2 == tun) - _tun_device_destroy(tun2); + _tun_device_release(tun2);
talloc_free(devname); }
-/* UNLOCKED release a reference; destroy if refcount drops to 0 */ +/* UNLOCKED release a reference; start async tun release procedure if refcount drops to 0 */ bool _tun_device_release(struct tun_device *tun) { bool released = false; @@ -427,10 +439,17 @@
tun->use_count--; if (tun->use_count == 0) { - _tun_device_destroy(tun); + LOGTUN(tun, LOGL_INFO, "Releasing\n"); + llist_del(&tun->list); + tun->itq_msg.tun_released.tun = tun; + tun->d->reset_all_state_tun_remaining++; + /* We cancel the thread: the pthread_cleanup routing will send a message + * back to us (main thread) when finally cancelled. */ + pthread_cancel(tun->thread); released = true; - } else + } else { LOGTUN(tun, LOGL_DEBUG, "Release; new use_count=%lu\n", tun->use_count); + }
return released; }