laforge has submitted this change. ( https://gerrit.osmocom.org/c/osmo-e1d/+/27839 )
Change subject: octoi: Disconnect the link when >= 7500 underruns/s ......................................................................
octoi: Disconnect the link when >= 7500 underruns/s
This situation is the result of the peer clock being continuously too slow compared to the local clock, leading to RIFO underruns at [virtually] all of the 8000/s E1 frames. As the current code doesn't recover from this, we might as well disconnect and re-start for recovery.
Change-Id: Ie3fffa1c1c20962b40320c8cc088c140b8d64e77 --- M src/octoi/e1oip.h M src/octoi/octoi_clnt_fsm.c M src/octoi/octoi_srv_fsm.c 3 files changed, 45 insertions(+), 5 deletions(-)
Approvals: Jenkins Builder: Verified laforge: Looks good to me, approved tnt: Looks good to me, but someone else must approve
diff --git a/src/octoi/e1oip.h b/src/octoi/e1oip.h index 1a184d2..99b88f0 100644 --- a/src/octoi/e1oip.h +++ b/src/octoi/e1oip.h @@ -59,6 +59,13 @@ /* TODO: statistics (RTT, frame loss, std deviation, alarms */ };
+/* get the rate of the given counter during the last second */ +static inline uint64_t iline_ctr_get_rate_1s(struct e1oip_line *iline, unsigned int idx) +{ + const struct rate_ctr *ctr = rate_ctr_group_get_ctr(iline->ctrs, idx); + return ctr->intv[RATE_CTR_INTV_SEC].rate; +} + struct e1oip_line *e1oip_line_alloc(struct octoi_peer *peer); void e1oip_line_set_name(struct e1oip_line *line, const char *name); void e1oip_line_reset(struct e1oip_line *iline); diff --git a/src/octoi/octoi_clnt_fsm.c b/src/octoi/octoi_clnt_fsm.c index f2fe989..a3d0518 100644 --- a/src/octoi/octoi_clnt_fsm.c +++ b/src/octoi/octoi_clnt_fsm.c @@ -34,6 +34,7 @@ CLNT_ST_ACCEPTED, CLNT_ST_REJECTED, CLNT_ST_REDIRECTED, + CLNT_ST_WAIT_RECONNECT, };
struct clnt_state { @@ -179,7 +180,8 @@ .name = "ACCEPTED", .in_event_mask = S(OCTOI_CLNT_EV_RX_AUTH_REQ) | S(OCTOI_EV_RX_TDM_DATA), - .out_state_mask = S(CLNT_ST_INIT), + .out_state_mask = S(CLNT_ST_INIT) | + S(CLNT_ST_WAIT_RECONNECT), .action = clnt_st_accepted, .onenter = clnt_st_accepted_onenter, .onleave = clnt_st_accepted_onleave, @@ -196,6 +198,11 @@ .out_state_mask = S(CLNT_ST_SVC_REQ_SENT), .action = clnt_st_redirected, }, + [CLNT_ST_WAIT_RECONNECT] = { + .name = "WAIT_RECONNECT", + .in_event_mask = 0, + .out_state_mask = S(CLNT_ST_INIT), + }, };
static void clnt_allstate_action(struct osmo_fsm_inst *fi, uint32_t event, void *data) @@ -235,6 +242,9 @@ PACKAGE_NAME, PACKAGE_VERSION, st->capability_flags); osmo_fsm_inst_state_chg(fi, CLNT_ST_SVC_REQ_SENT, 10, 0); break; + case CLNT_ST_WAIT_RECONNECT: + LOGPFSML(fi, LOGL_INFO, "Re-starting connection\n"); + osmo_fsm_inst_state_chg(fi, CLNT_ST_INIT, 0, 0); } return 0; } @@ -257,6 +267,7 @@ struct osmo_fsm_inst *fi = data; struct clnt_state *st = fi->priv; struct timespec ts; + uint64_t rate;
clock_gettime(CLOCK_MONOTONIC, &ts);
@@ -264,8 +275,19 @@ LOGPFSML(fi, LOGL_NOTICE, "No TDM data received for >= 3 seconds, declaring peer dead\n"); osmo_fsm_inst_state_chg(fi, CLNT_ST_INIT, 0, 0); osmo_fsm_inst_dispatch(fi, OCTOI_CLNT_EV_REQUEST_SERVICE, NULL); - } else - osmo_timer_schedule(&st->rx_alive_timer, 3, 0); + return; + } + + rate = iline_ctr_get_rate_1s(st->peer->iline, LINE_CTR_E1oIP_UNDERRUN); + if (rate > 7500) { + LOGPFSML(fi, LOGL_ERROR, "More than 7500 RIFO underruns per second: " + "Your clock appears to be too fast. Disconnecting.\n"); + osmo_fsm_inst_state_chg(fi, CLNT_ST_WAIT_RECONNECT, 10, 0); + osmo_fsm_inst_dispatch(fi, OCTOI_CLNT_EV_REQUEST_SERVICE, NULL); + return; + } + + osmo_timer_schedule(&st->rx_alive_timer, 3, 0); }
diff --git a/src/octoi/octoi_srv_fsm.c b/src/octoi/octoi_srv_fsm.c index 3990a16..5f51003 100644 --- a/src/octoi/octoi_srv_fsm.c +++ b/src/octoi/octoi_srv_fsm.c @@ -345,14 +345,25 @@ struct osmo_fsm_inst *fi = data; struct srv_state *st = fi->priv; struct timespec ts; + uint64_t rate;
clock_gettime(CLOCK_MONOTONIC, &ts);
if (ts.tv_sec - st->peer->last_rx_tdm > 3) { LOGPFSML(fi, LOGL_NOTICE, "No TDM data received for >= 3 seconds, declaring peer dead\n"); osmo_fsm_inst_term(fi, OSMO_FSM_TERM_TIMEOUT, NULL); - } else - osmo_timer_schedule(&st->rx_alive_timer, 3, 0); + return; + } + + rate = iline_ctr_get_rate_1s(st->peer->iline, LINE_CTR_E1oIP_UNDERRUN); + if (rate > 7500) { + LOGPFSML(fi, LOGL_ERROR, "More than 7500 RIFO underruns per second: " + "Peer clock is too slow. Disconnecting.\n"); + osmo_fsm_inst_term(fi, OSMO_FSM_TERM_ERROR, NULL); + return; + } + + osmo_timer_schedule(&st->rx_alive_timer, 3, 0); }
/* call-back function for every received OCTOI socket message for given peer */