From: Holger Hans Peter Freyther holger@moiji-mobile.com
Looking at the code it seemed possible that a channel would transition from BROKEN to NONE. Or worse from NONE to BROKEN. Start the timer _after_ the channel has been released. --- openbsc/src/libbsc/abis_rsl.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/openbsc/src/libbsc/abis_rsl.c b/openbsc/src/libbsc/abis_rsl.c index 5d40794..984fa7e 100644 --- a/openbsc/src/libbsc/abis_rsl.c +++ b/openbsc/src/libbsc/abis_rsl.c @@ -52,6 +52,7 @@ enum sacch_deact { };
static int rsl_send_imm_assignment(struct gsm_lchan *lchan); +static void error_timeout_cb(void *data);
static void send_lchan_signal(int sig_no, struct gsm_lchan *lchan, struct gsm_meas_rep *resp) @@ -64,9 +65,15 @@ static void send_lchan_signal(int sig_no, struct gsm_lchan *lchan,
static void do_lchan_free(struct gsm_lchan *lchan) { - /* we have an error timer pending to release that */ - if (lchan->state != LCHAN_S_REL_ERR) + /* We start the error timer to make the channel available again */ + if (lchan->state == LCHAN_S_REL_ERR) { + lchan->error_timer.data = lchan; + lchan->error_timer.cb = error_timeout_cb; + osmo_timer_schedule(&lchan->error_timer, + lchan->ts->trx->bts->network->T3111 + 2, 0); + } else { rsl_lchan_set_state(lchan, LCHAN_S_NONE); + } lchan_free(lchan); }
@@ -679,8 +686,6 @@ static int rsl_rf_chan_release(struct gsm_lchan *lchan, int error, DEBUGP(DRSL, "%s RF Channel Release CMD due error %d\n", gsm_lchan_name(lchan), error);
if (error) { - struct e1inp_sign_link *sign_link = msg->dst; - /* * FIXME: GSM 04.08 gives us two options for the abnormal * chanel release. This can be either like in the non-existent @@ -708,10 +713,6 @@ static int rsl_rf_chan_release(struct gsm_lchan *lchan, int error, * TODO: start T3109 now. */ rsl_lchan_set_state(lchan, LCHAN_S_REL_ERR); - lchan->error_timer.data = lchan; - lchan->error_timer.cb = error_timeout_cb; - osmo_timer_schedule(&lchan->error_timer, - sign_link->trx->bts->network->T3111 + 2, 0); }
/* Start another timer or assume the BTS sends a ACK/NACK? */
From: Holger Hans Peter Freyther holger@moiji-mobile.com
When we receive an ERROR INDICATION and CONNECTION FAILURE we might call rsl_rf_chan_release multiple times. The channel release handling is still a bit messy and there too many paths that lead to the call.
1.) In case we receive an ERROR INDICATION for SAPI=3. A RLL error signal will be emitted that leads to the release of the channel through the SMS code in case of the NITB. The call to rsl_rf_chan_release might be a double release.
2.) In case a CONNECTION FAILURE is received when the release process has already been started we would unconditionally call rsl_rf_chan_release as well.
Because the lchan state is changed by the callers of the rsl_rf_chan_release we can not move the state checking into this code but need to do it in the caller. The issue was seen in a trace from Rhizomatica and I created the DoubleRelease.st to re-produce the issue and verified that we have no duplicate RF Channel Releses.
The other option would be to introduce a new state to track the release process and see if we have already released SAPIs deactivated the SACCH or such. We can not simply look at these as for a channel that fails to activate they will be null already. --- openbsc/src/libbsc/abis_rsl.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/openbsc/src/libbsc/abis_rsl.c b/openbsc/src/libbsc/abis_rsl.c index 984fa7e..de76d0f 100644 --- a/openbsc/src/libbsc/abis_rsl.c +++ b/openbsc/src/libbsc/abis_rsl.c @@ -1013,9 +1013,15 @@ static int rsl_rx_conn_fail(struct msgb *msg) struct abis_rsl_dchan_hdr *dh = msgb_l2(msg); struct tlv_parsed tp;
- /* FIXME: print which channel */ - LOGP(DRSL, LOGL_NOTICE, "%s CONNECTION FAIL: RELEASING ", - gsm_lchan_name(msg->lchan)); + LOGP(DRSL, LOGL_NOTICE, "%s CONNECTION FAIL: RELEASING state %s ", + gsm_lchan_name(msg->lchan), + gsm_lchans_name(msg->lchan->state)); + + /* We might have already received an ERROR INDICATION */ + if (msg->lchan->state != LCHAN_S_ACTIVE) { + LOGPC(DRSL, LOGL_NOTICE, "\n"); + return 0; + }
rsl_tlv_parse(&tp, dh->data, msgb_l2len(msg)-sizeof(*dh));
@@ -1587,12 +1593,21 @@ static int rsl_rx_rll_err_ind(struct msgb *msg) }
rlm_cause = *TLVP_VAL(&tp, RSL_IE_RLM_CAUSE); - LOGP(DRLL, LOGL_ERROR, "%s ERROR INDICATION cause=%s\n", + LOGP(DRLL, LOGL_ERROR, "%s ERROR INDICATION cause=%s in state=%s\n", gsm_lchan_name(msg->lchan), - rsl_rlm_cause_name(rlm_cause)); + rsl_rlm_cause_name(rlm_cause), + gsm_lchans_name(msg->lchan->state)); + + /* If the channel is already failing no need to inform anyone. */ + if (msg->lchan->state != LCHAN_S_ACTIVE) + return 0;
rll_indication(msg->lchan, rllh->link_id, BSC_RLLR_IND_ERR_IND);
+ /* The channel might have been released already. */ + if (msg->lchan->state != LCHAN_S_ACTIVE) + return 0; + if (rlm_cause == RLL_CAUSE_T200_EXPIRED) { osmo_counter_inc(msg->lchan->ts->trx->bts->network->stats.chan.rll_err); return rsl_rf_chan_release(msg->lchan, 1, SACCH_DEACTIVATE);
From: Holger Hans Peter Freyther holger@moiji-mobile.com
In case we receive ERROR INDICATION and CONNECTION FAILURE we only want to RF Channel Release the lchan once. This code is more simple and should work as reliable as the previous commit. --- openbsc/src/libbsc/abis_rsl.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-)
diff --git a/openbsc/src/libbsc/abis_rsl.c b/openbsc/src/libbsc/abis_rsl.c index de76d0f..748ab7e 100644 --- a/openbsc/src/libbsc/abis_rsl.c +++ b/openbsc/src/libbsc/abis_rsl.c @@ -726,6 +726,16 @@ static int rsl_rf_chan_release(struct gsm_lchan *lchan, int error, return rc; }
+/* + * Special handling for channel releases in the error case. + */ +static int rsl_rf_chan_release_err(struct gsm_lchan *lchan) +{ + if (lchan->state != LCHAN_S_ACTIVE) + return 0; + return rsl_rf_chan_release(lchan, 1, SACCH_DEACTIVATE); +} + static int rsl_rx_rf_chan_rel_ack(struct gsm_lchan *lchan) {
@@ -1017,12 +1027,6 @@ static int rsl_rx_conn_fail(struct msgb *msg) gsm_lchan_name(msg->lchan), gsm_lchans_name(msg->lchan->state));
- /* We might have already received an ERROR INDICATION */ - if (msg->lchan->state != LCHAN_S_ACTIVE) { - LOGPC(DRSL, LOGL_NOTICE, "\n"); - return 0; - } - rsl_tlv_parse(&tp, dh->data, msgb_l2len(msg)-sizeof(*dh));
if (TLVP_PRESENT(&tp, RSL_IE_CAUSE)) @@ -1031,7 +1035,7 @@ static int rsl_rx_conn_fail(struct msgb *msg)
LOGPC(DRSL, LOGL_NOTICE, "\n"); osmo_counter_inc(msg->lchan->ts->trx->bts->network->stats.chan.rf_fail); - return rsl_rf_chan_release(msg->lchan, 1, SACCH_DEACTIVATE); + return rsl_rf_chan_release_err(msg->lchan); }
static void print_meas_rep_uni(struct gsm_meas_rep_unidir *mru, @@ -1598,19 +1602,11 @@ static int rsl_rx_rll_err_ind(struct msgb *msg) rsl_rlm_cause_name(rlm_cause), gsm_lchans_name(msg->lchan->state));
- /* If the channel is already failing no need to inform anyone. */ - if (msg->lchan->state != LCHAN_S_ACTIVE) - return 0; - rll_indication(msg->lchan, rllh->link_id, BSC_RLLR_IND_ERR_IND);
- /* The channel might have been released already. */ - if (msg->lchan->state != LCHAN_S_ACTIVE) - return 0; - if (rlm_cause == RLL_CAUSE_T200_EXPIRED) { osmo_counter_inc(msg->lchan->ts->trx->bts->network->stats.chan.rll_err); - return rsl_rf_chan_release(msg->lchan, 1, SACCH_DEACTIVATE); + return rsl_rf_chan_release_err(msg->lchan); }
return 0;