pespin has submitted this change. ( https://gerrit.osmocom.org/c/libosmo-pfcp/+/42120?usp=email )
Change subject: cp_peer: Implement local originated heartbeat procedure ......................................................................
cp_peer: Implement local originated heartbeat procedure
Submit PFCP Hearbeat Request with configured interval, and timeout after no PFCP Hearbeat Response received based on configuration.
Upon timeout, the cp_peer assoc_cb() is called to notify the user that the peer is considered not associated anymore. It will then try to keep associating again automatically.
Default value for OSMO_PFCP_TIMER_HEARTBEAT_RESP is increased to 35s to allow for a single Heartbeat Request packet loss.
Related: SYS#7294 Change-Id: I7efc0961e1ea39dd7f4cc6ba96be4cf5ce9a2d6c --- M include/osmocom/pfcp/pfcp_cp_peer_private.h M src/libosmo-pfcp/pfcp_cp_peer.c M src/libosmo-pfcp/pfcp_endpoint.c 3 files changed, 104 insertions(+), 9 deletions(-)
Approvals: Jenkins Builder: Verified laforge: Looks good to me, but someone else must approve pespin: Looks good to me, approved osmith: Looks good to me, but someone else must approve
diff --git a/include/osmocom/pfcp/pfcp_cp_peer_private.h b/include/osmocom/pfcp/pfcp_cp_peer_private.h index b8b0af7..503912d 100644 --- a/include/osmocom/pfcp/pfcp_cp_peer_private.h +++ b/include/osmocom/pfcp/pfcp_cp_peer_private.h @@ -46,6 +46,9 @@ /* Application private data for assoc_cb, in case ep->priv does not suffice. */ void *priv;
+ struct osmo_timer_list heartbeat_tx_timer; + struct osmo_timer_list heartbeat_rx_timer; + struct osmo_use_count use_count; struct osmo_use_count_entry use_count_buf[128]; }; diff --git a/src/libosmo-pfcp/pfcp_cp_peer.c b/src/libosmo-pfcp/pfcp_cp_peer.c index 0c29df7..2e4fd1b 100644 --- a/src/libosmo-pfcp/pfcp_cp_peer.c +++ b/src/libosmo-pfcp/pfcp_cp_peer.c @@ -45,12 +45,14 @@ enum pfcp_cp_peer_fsm_event { PFCP_CP_PEER_EV_RX_ASSOC_SETUP_RESP, PFCP_CP_PEER_EV_RX_ASSOC_UPDATE_REQ, + PFCP_CP_PEER_EV_HEARTBEAT_TIMEOUT, PFCP_CP_PEER_EV_USE_COUNT_ZERO, };
static const struct value_string pfcp_cp_peer_fsm_event_names[] = { OSMO_VALUE_STRING(PFCP_CP_PEER_EV_RX_ASSOC_SETUP_RESP), OSMO_VALUE_STRING(PFCP_CP_PEER_EV_RX_ASSOC_UPDATE_REQ), + OSMO_VALUE_STRING(PFCP_CP_PEER_EV_HEARTBEAT_TIMEOUT), OSMO_VALUE_STRING(PFCP_CP_PEER_EV_USE_COUNT_ZERO), {} }; @@ -91,6 +93,62 @@ return 0; }
+static int on_pfcp_heartbeat_resp(struct osmo_pfcp_msg *req, struct osmo_pfcp_msg *rx_resp, const char *errmsg) +{ + struct osmo_fsm_inst *fi = req->ctx.peer_fi; + struct osmo_pfcp_cp_peer *cp_peer = fi->priv; + + if (!rx_resp) { + OSMO_LOG_PFCP_MSG(req, LOGL_NOTICE, "Error: PFCP Heartbeat Response: %s\n", + errmsg ? : "no response received"); + return 0; + } + + OSMO_LOG_PFCP_MSG(rx_resp, LOGL_INFO, "Rx PFCP Heartbeat Response\n"); + + if (fi->state != PFCP_CP_PEER_ST_ASSOCIATED) + return 0; + + unsigned int tval_rx_heartbeat_s = + osmo_tdef_get(cp_peer->ep->cfg.tdefs, OSMO_PFCP_TIMER_HEARTBEAT_RESP, OSMO_TDEF_S, -1); + osmo_timer_schedule(&cp_peer->heartbeat_rx_timer, tval_rx_heartbeat_s, 0); + + return 0; +} + +static void pfcp_cp_peer_tx_heartbeat_req(struct osmo_pfcp_cp_peer *cp_peer) +{ + struct osmo_pfcp_msg *m; + + m = osmo_pfcp_cp_peer_new_req(cp_peer, OSMO_PFCP_MSGT_HEARTBEAT_REQ); + m->ies.heartbeat_req.recovery_time_stamp = osmo_pfcp_endpoint_get_recovery_timestamp(cp_peer->ep); + + m->ctx.resp_cb = on_pfcp_heartbeat_resp; + + LOG_CP_PEER(cp_peer, LOGL_INFO, "Tx PFCP Heartbeat Request\n"); + + if (osmo_pfcp_endpoint_tx(cp_peer->ep, m)) + LOG_CP_PEER(cp_peer, LOGL_ERROR, "Failed to transmit PFCP Heartbeat Request to peer\n"); +} + +static void pfcp_cp_peer_heartbeat_tx_timer_cb(void *data) +{ + struct osmo_pfcp_cp_peer *cp_peer = data; + unsigned int tval_tx_heartbeat_s = + osmo_tdef_get(cp_peer->ep->cfg.tdefs, OSMO_PFCP_TIMER_HEARTBEAT_REQ, OSMO_TDEF_S, -1); + + pfcp_cp_peer_tx_heartbeat_req(cp_peer); + + osmo_timer_schedule(&cp_peer->heartbeat_tx_timer, tval_tx_heartbeat_s, 0); +} + +static void pfcp_cp_peer_heartbeat_rx_timer_cb(void *data) +{ + struct osmo_pfcp_cp_peer *cp_peer = data; + + osmo_fsm_inst_dispatch(cp_peer->fi, PFCP_CP_PEER_EV_HEARTBEAT_TIMEOUT, NULL); +} + /* Allocate PFCP CP peer FSM and start sending PFCP Association Setup Request messages to remote_addr, using endpoint * ep. As soon as a successful response is received, change to state PFCP_CP_PEER_ST_ASSOCIATED. */ @@ -117,9 +175,20 @@ osmo_use_count_make_static_entries(&cp_peer->use_count, cp_peer->use_count_buf, ARRAY_SIZE(cp_peer->use_count_buf));
osmo_fsm_inst_update_id_f_sanitize(fi, '-', osmo_sockaddr_to_str_c(OTC_SELECT, &cp_peer->remote_addr)); + + osmo_timer_setup(&cp_peer->heartbeat_tx_timer, pfcp_cp_peer_heartbeat_tx_timer_cb, cp_peer); + osmo_timer_setup(&cp_peer->heartbeat_rx_timer, pfcp_cp_peer_heartbeat_rx_timer_cb, cp_peer); return cp_peer; }
+ +static void pfcp_cp_peer_fsm_cleanup(struct osmo_fsm_inst *fi, enum osmo_fsm_term_cause cause) +{ + struct osmo_pfcp_cp_peer *cp_peer = fi->priv; + osmo_timer_del(&cp_peer->heartbeat_tx_timer); + osmo_timer_del(&cp_peer->heartbeat_rx_timer); +} + void osmo_pfcp_cp_peer_free(struct osmo_pfcp_cp_peer *cp_peer) { if (!cp_peer) @@ -257,8 +326,18 @@ static void pfcp_cp_peer_associated_onenter(struct osmo_fsm_inst *fi, uint32_t prev_state) { struct osmo_pfcp_cp_peer *cp_peer = fi->priv; + unsigned int tval_rx_heartbeat_s = + osmo_tdef_get(cp_peer->ep->cfg.tdefs, OSMO_PFCP_TIMER_HEARTBEAT_RESP, OSMO_TDEF_S, -1); LOG_CP_PEER(cp_peer, LOGL_NOTICE, "Associated with UPF %s\n", osmo_sockaddr_to_str_c(OTC_SELECT, &cp_peer->remote_addr)); + + /* Send first Heartbeat Req immediately to fetch recovery timestamp info from peer. + * 3GPP TS 23.007 19A: "When peer PFCP entities information is available, i.e. when the PFCP Association + * is still alive, the restarted PFCP entity shall send its updated Recovery Time Stamps in a Heartbeat + * Request message to the peer PFCP entities before initiating any PFCP session signalling" + */ + osmo_timer_schedule(&cp_peer->heartbeat_tx_timer, 0, 0); + osmo_timer_schedule(&cp_peer->heartbeat_rx_timer, tval_rx_heartbeat_s, 0); if (cp_peer->assoc_cb) cp_peer->assoc_cb(cp_peer, true); } @@ -273,6 +352,11 @@ LOG_CP_PEER(cp_peer, LOGL_ERROR, "PFCP Association Update Request is not implemented\n"); break;
+ case PFCP_CP_PEER_EV_HEARTBEAT_TIMEOUT: + LOG_CP_PEER(cp_peer, LOGL_NOTICE, "Heartbeat timeout!\n"); + osmo_pfcp_cp_peer_fsm_state_chg(PFCP_CP_PEER_ST_WAIT_ASSOC_SETUP_RESP); + break; + default: OSMO_ASSERT(false); } @@ -283,6 +367,8 @@ struct osmo_pfcp_cp_peer *cp_peer = fi->priv; LOG_CP_PEER(cp_peer, LOGL_NOTICE, "Disassociating from UPF %s\n", osmo_sockaddr_to_str_c(OTC_SELECT, &cp_peer->remote_addr)); + osmo_timer_del(&cp_peer->heartbeat_tx_timer); + osmo_timer_del(&cp_peer->heartbeat_rx_timer); if (cp_peer->assoc_cb) cp_peer->assoc_cb(cp_peer, false); } @@ -358,6 +444,7 @@ .name = "associated", .in_event_mask = 0 | S(PFCP_CP_PEER_EV_RX_ASSOC_UPDATE_REQ) + | S(PFCP_CP_PEER_EV_HEARTBEAT_TIMEOUT) , .out_state_mask = 0 | S(PFCP_CP_PEER_ST_WAIT_ASSOC_SETUP_RESP) @@ -394,6 +481,7 @@ .num_states = ARRAY_SIZE(pfcp_cp_peer_fsm_states), .log_subsys = DLPFCP, .event_names = pfcp_cp_peer_fsm_event_names, + .cleanup = pfcp_cp_peer_fsm_cleanup, .timer_cb = pfcp_cp_peer_fsm_timer_cb, .allstate_action = pfcp_cp_peer_allstate_action, .allstate_event_mask = 0 diff --git a/src/libosmo-pfcp/pfcp_endpoint.c b/src/libosmo-pfcp/pfcp_endpoint.c index bb7fbf3..e93e272 100644 --- a/src/libosmo-pfcp/pfcp_endpoint.c +++ b/src/libosmo-pfcp/pfcp_endpoint.c @@ -76,7 +76,7 @@ { .T = OSMO_PFCP_TIMER_HEARTBEAT_REQ, .default_val = 15, .unit = OSMO_TDEF_S, .desc = "PFCP Heartbeat Request period, how long to wait between issuing requests" }, - { .T = OSMO_PFCP_TIMER_HEARTBEAT_RESP, .default_val = 15, .unit = OSMO_TDEF_S, + { .T = OSMO_PFCP_TIMER_HEARTBEAT_RESP, .default_val = 35, .unit = OSMO_TDEF_S, .desc = "PFCP Heartbeat Response timeout, the time after which to regard a non-responding peer as disconnected" }, { .T = OSMO_PFCP_TIMER_GRACEFUL_REL, .default_val = 15, .unit = OSMO_TDEF_S, @@ -128,14 +128,18 @@ return ep; }
-static unsigned int ep_n1(struct osmo_pfcp_endpoint *ep) +static unsigned int ep_n1(struct osmo_pfcp_endpoint *ep, const struct osmo_pfcp_msg *m) { + if (m->h.message_type == OSMO_PFCP_MSGT_HEARTBEAT_REQ) + return 0; return osmo_tdef_get(ep->cfg.tdefs, OSMO_PFCP_TIMER_N1, OSMO_TDEF_CUSTOM, -1); }
-static unsigned int ep_t1(struct osmo_pfcp_endpoint *ep) +static unsigned int ep_t1(struct osmo_pfcp_endpoint *ep, const struct osmo_pfcp_msg *m) { - return osmo_tdef_get(ep->cfg.tdefs, OSMO_PFCP_TIMER_T1, OSMO_TDEF_MS, -1); + int T = m->h.message_type == OSMO_PFCP_MSGT_HEARTBEAT_REQ ? OSMO_PFCP_TIMER_HEARTBEAT_RESP : + OSMO_PFCP_TIMER_T1; + return osmo_tdef_get(ep->cfg.tdefs, T, OSMO_TDEF_MS, -1); }
static unsigned int ep_keep_resp(const struct osmo_pfcp_endpoint *ep, const struct osmo_pfcp_msg *m) @@ -154,8 +158,8 @@ static bool pfcp_queue_retrans(struct osmo_pfcp_queue_entry *qe) { struct osmo_pfcp_endpoint *endpoint = qe->ep; - unsigned int t1_ms = ep_t1(endpoint); struct osmo_pfcp_msg *m = qe->m; + unsigned int t1_ms = ep_t1(endpoint, m); int rc;
/* if no more attempts remaining, drop from queue */ @@ -260,12 +264,12 @@ timeout_ms = ep_keep_resp(endpoint, m); OSMO_LOG_PFCP_MSG(m, LOGL_DEBUG, "keep sent Responses for %ums\n", timeout_ms); } else { - timeout_ms = ep_t1(endpoint); - n1 = ep_n1(endpoint); + timeout_ms = ep_t1(endpoint, m); + n1 = ep_n1(endpoint, m);
OSMO_LOG_PFCP_MSG(m, LOGL_DEBUG, "retransmit unanswered Requests %u x %ums\n", n1, timeout_ms); - /* If there are no retransmissions or no timeout, it makes no sense to add to the queue. */ - if (!n1 || !timeout_ms) { + /* If there are no retransmissions and no timeout, it makes no sense to add to the queue. */ + if (!n1 && !timeout_ms) { if (!m->is_response && m->ctx.resp_cb) m->ctx.resp_cb(m, NULL, "PFCP timeout is zero, cannot wait for a response"); return 0;