fixeria has submitted this change. ( https://gerrit.osmocom.org/c/erlang/osmo-s1gw/+/41477?usp=email )
Change subject: sctp_{server,proxy}: handle sctp_error messages from gen_sctp ......................................................................
sctp_{server,proxy}: handle sctp_error messages from gen_sctp
The `{sctp_error, ...}` message is undocumented, but it does exist, and we have encountered it in production. The logic that generates `{sctp, ...}` and `{sctp_error, ...}` messages resides in Erlang/OTP's inet_drv.c, specifically in packet_binary_message() and sctp_parse_async_event(). Within sctp_parse_async_event(), an error is indicated by replacing the initial `sctp` atom with `sctp_error`.
Currently the following SCTP events are treated as errors:
* SCTP_SEND_FAILED (becomes #sctp_send_failed{}), * SCTP_REMOTE_ERROR (becomes #sctp_remote_error{}), * SCTP_PARTIAL_DELIVERY_EVENT (becomes #sctp_pdapi_event{}).
Print more specific messages when the sctp_error is received. Add new metrics for the above-mentioned SCTP events.
Change-Id: I38203d915d54dacd4e9bbf158ab86f8936585a34 Related: SYS#7738 --- M include/s1gw_metrics.hrl M src/s1gw_metrics.erl M src/sctp_common.erl M src/sctp_proxy.erl M src/sctp_server.erl 5 files changed, 41 insertions(+), 1 deletion(-)
Approvals: laforge: Looks good to me, approved Jenkins Builder: Verified pespin: Looks good to me, but someone else must approve
diff --git a/include/s1gw_metrics.hrl b/include/s1gw_metrics.hrl index 5afbff0..385b5fa 100644 --- a/include/s1gw_metrics.hrl +++ b/include/s1gw_metrics.hrl @@ -40,6 +40,12 @@ -define(S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ALL, [ctr, s1ap, proxy, out_pkt, reply, all]). -define(S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ERAB_SETUP_RSP, [ctr, s1ap, proxy, out_pkt, reply, erab_setup_rsp]).
+%% SCTP related metrics +-define(S1GW_CTR_SCTP_ERROR_ALL, [ctr, sctp, error, all]). +-define(S1GW_CTR_SCTP_ERROR_SEND_FAILED, [ctr, sctp, error, send_failed]). +-define(S1GW_CTR_SCTP_ERROR_PDAPI_EVENT, [ctr, sctp, error, pdapi_event]). +-define(S1GW_CTR_SCTP_ERROR_REMOTE_ERROR, [ctr, sctp, error, remote_error]). + %% per-eNB counters %% NOTE: these counters shall not be listed in ?S1GW_COUNTERS, %% but created dynamically for each connecting eNB. diff --git a/src/s1gw_metrics.erl b/src/s1gw_metrics.erl index 7ca3352..bb4bd97 100644 --- a/src/s1gw_metrics.erl +++ b/src/s1gw_metrics.erl @@ -95,7 +95,13 @@ ?S1GW_CTR_S1AP_PROXY_OUT_PKT_FWD_PROC, %% forwarded: processed ?S1GW_CTR_S1AP_PROXY_OUT_PKT_FWD_UNMODIFIED, %% forwarded: unmodified ?S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ALL, %% replied: total - ?S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ERAB_SETUP_RSP %% replied: E-RAB SETUP.rsp + ?S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ERAB_SETUP_RSP, %% replied: E-RAB SETUP.rsp + + %% SCTP related counters + ?S1GW_CTR_SCTP_ERROR_ALL, %% total number of SCTP errors + ?S1GW_CTR_SCTP_ERROR_SEND_FAILED, %% send operation failed + ?S1GW_CTR_SCTP_ERROR_PDAPI_EVENT, %% partial delivery failure + ?S1GW_CTR_SCTP_ERROR_REMOTE_ERROR %% remote error ]).
-define(S1GW_GAUGES, [ diff --git a/src/sctp_common.erl b/src/sctp_common.erl index 296dbff..9b86b50 100644 --- a/src/sctp_common.erl +++ b/src/sctp_common.erl @@ -35,12 +35,14 @@ -module(sctp_common).
-export([parse_addr/1, + report_error/1, send_data/2, shutdown/1]).
-include_lib("kernel/include/logger.hrl"). -include_lib("kernel/include/inet_sctp.hrl").
+-include("s1gw_metrics.hrl"). -include("s1ap.hrl").
@@ -68,6 +70,20 @@ end.
+-spec report_error(term()) -> term(). +report_error(Error) -> + s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_ALL), + case Error of + #sctp_send_failed{} -> + s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_SEND_FAILED); + #sctp_pdapi_event{} -> + s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_PDAPI_EVENT); + #sctp_remote_error{} -> + s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_REMOTE_ERROR); + _ -> ok + end. + + -spec send_data(sock_aid(), binary()) -> ok | {error, term()}. send_data({Sock, Aid}, Data) -> gen_sctp:send(Sock, #sctp_sndrcvinfo{stream = ?S1AP_SCTP_STREAM, diff --git a/src/sctp_proxy.erl b/src/sctp_proxy.erl index 594b286..3aa67d8 100644 --- a/src/sctp_proxy.erl +++ b/src/sctp_proxy.erl @@ -225,6 +225,12 @@ [State, MmeAddr, MmePort, AncData, Data]), keep_state_and_data;
+handle_event(State, info, {sctp_error, _Socket, MmeAddr, MmePort, {_, Error}}, _S) -> + ?LOG_ERROR("Rx SCTP error in state ~p (~p:~p): ~p", + [State, MmeAddr, MmePort, Error]), + sctp_common:report_error(Error), + keep_state_and_data; + handle_event(State, Event, EventData, _S) -> ?LOG_ERROR("Unexpected event ~p in state ~p: ~p", [Event, State, EventData]), keep_state_and_data. diff --git a/src/sctp_server.erl b/src/sctp_server.erl index d194ce4..52d50e6 100644 --- a/src/sctp_server.erl +++ b/src/sctp_server.erl @@ -158,6 +158,12 @@ S1 = sctp_recv({FromAddr, FromPort, AncData, Data}, S0), {noreply, S1};
+handle_info({sctp_error, _Socket, FromAddr, FromPort, {_, Error}}, S0) -> + ?LOG_ERROR("Rx SCTP error (~p:~p): ~p", + [FromAddr, FromPort, Error]), + sctp_common:report_error(Error), + {noreply, S0}; + %% Handle termination events of the child processes handle_info({'EXIT', Pid, Reason}, #server_state{sock = Sock, clients = Clients} = S0) ->