fixeria has uploaded this change for review.

View Change

sctp_{server,proxy}: handle sctp_error messages from gen_sctp

The `{sctp_error, ...}` message is undocumented, but it does exist,
and we have encountered it in production. The logic that generates
`{sctp, ...}` and `{sctp_error, ...}` messages resides in Erlang/OTP's
inet_drv.c, specifically in packet_binary_message() and
sctp_parse_async_event(). Within sctp_parse_async_event(), an error
is indicated by replacing the initial `sctp` atom with `sctp_error`.

Currently the following SCTP events are treated as errors:

* SCTP_SEND_FAILED (becomes #sctp_send_failed{}),
* SCTP_REMOTE_ERROR (becomes #sctp_remote_error{}),
* SCTP_PARTIAL_DELIVERY_EVENT (becomes #sctp_pdapi_event{}).

Print more specific messages when the sctp_error is received.
Add new metrics for the above-mentioned SCTP events.

Change-Id: I38203d915d54dacd4e9bbf158ab86f8936585a34
Related: SYS#7738
---
M include/s1gw_metrics.hrl
M src/s1gw_metrics.erl
M src/sctp_common.erl
M src/sctp_proxy.erl
M src/sctp_server.erl
5 files changed, 48 insertions(+), 1 deletion(-)

git pull ssh://gerrit.osmocom.org:29418/erlang/osmo-s1gw refs/changes/77/41477/1
diff --git a/include/s1gw_metrics.hrl b/include/s1gw_metrics.hrl
index 5afbff0..385b5fa 100644
--- a/include/s1gw_metrics.hrl
+++ b/include/s1gw_metrics.hrl
@@ -40,6 +40,12 @@
-define(S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ALL, [ctr, s1ap, proxy, out_pkt, reply, all]).
-define(S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ERAB_SETUP_RSP, [ctr, s1ap, proxy, out_pkt, reply, erab_setup_rsp]).

+%% SCTP related metrics
+-define(S1GW_CTR_SCTP_ERROR_ALL, [ctr, sctp, error, all]).
+-define(S1GW_CTR_SCTP_ERROR_SEND_FAILED, [ctr, sctp, error, send_failed]).
+-define(S1GW_CTR_SCTP_ERROR_PDAPI_EVENT, [ctr, sctp, error, pdapi_event]).
+-define(S1GW_CTR_SCTP_ERROR_REMOTE_ERROR, [ctr, sctp, error, remote_error]).
+
%% per-eNB counters
%% NOTE: these counters shall not be listed in ?S1GW_COUNTERS,
%% but created dynamically for each connecting eNB.
diff --git a/src/s1gw_metrics.erl b/src/s1gw_metrics.erl
index 7ca3352..bb4bd97 100644
--- a/src/s1gw_metrics.erl
+++ b/src/s1gw_metrics.erl
@@ -95,7 +95,13 @@
?S1GW_CTR_S1AP_PROXY_OUT_PKT_FWD_PROC, %% forwarded: processed
?S1GW_CTR_S1AP_PROXY_OUT_PKT_FWD_UNMODIFIED, %% forwarded: unmodified
?S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ALL, %% replied: total
- ?S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ERAB_SETUP_RSP %% replied: E-RAB SETUP.rsp
+ ?S1GW_CTR_S1AP_PROXY_OUT_PKT_REPLY_ERAB_SETUP_RSP, %% replied: E-RAB SETUP.rsp
+
+ %% SCTP related counters
+ ?S1GW_CTR_SCTP_ERROR_ALL, %% total number of SCTP errors
+ ?S1GW_CTR_SCTP_ERROR_SEND_FAILED, %% send operation failed
+ ?S1GW_CTR_SCTP_ERROR_PDAPI_EVENT, %% partial delivery failure
+ ?S1GW_CTR_SCTP_ERROR_REMOTE_ERROR %% remote error
]).

-define(S1GW_GAUGES, [
diff --git a/src/sctp_common.erl b/src/sctp_common.erl
index 296dbff..9b86b50 100644
--- a/src/sctp_common.erl
+++ b/src/sctp_common.erl
@@ -35,12 +35,14 @@
-module(sctp_common).

-export([parse_addr/1,
+ report_error/1,
send_data/2,
shutdown/1]).

-include_lib("kernel/include/logger.hrl").
-include_lib("kernel/include/inet_sctp.hrl").

+-include("s1gw_metrics.hrl").
-include("s1ap.hrl").


@@ -68,6 +70,20 @@
end.


+-spec report_error(term()) -> term().
+report_error(Error) ->
+ s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_ALL),
+ case Error of
+ #sctp_send_failed{} ->
+ s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_SEND_FAILED);
+ #sctp_pdapi_event{} ->
+ s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_PDAPI_EVENT);
+ #sctp_remote_error{} ->
+ s1gw_metrics:ctr_inc(?S1GW_CTR_SCTP_ERROR_REMOTE_ERROR);
+ _ -> ok
+ end.
+
+
-spec send_data(sock_aid(), binary()) -> ok | {error, term()}.
send_data({Sock, Aid}, Data) ->
gen_sctp:send(Sock, #sctp_sndrcvinfo{stream = ?S1AP_SCTP_STREAM,
diff --git a/src/sctp_proxy.erl b/src/sctp_proxy.erl
index 594b286..3aa67d8 100644
--- a/src/sctp_proxy.erl
+++ b/src/sctp_proxy.erl
@@ -225,6 +225,12 @@
[State, MmeAddr, MmePort, AncData, Data]),
keep_state_and_data;

+handle_event(State, info, {sctp_error, _Socket, MmeAddr, MmePort, {_, Error}}, _S) ->
+ ?LOG_ERROR("Rx SCTP error in state ~p (~p:~p): ~p",
+ [State, MmeAddr, MmePort, Error]),
+ sctp_common:report_error(Error),
+ keep_state_and_data;
+
handle_event(State, Event, EventData, _S) ->
?LOG_ERROR("Unexpected event ~p in state ~p: ~p", [Event, State, EventData]),
keep_state_and_data.
diff --git a/src/sctp_server.erl b/src/sctp_server.erl
index d194ce4..46c352b 100644
--- a/src/sctp_server.erl
+++ b/src/sctp_server.erl
@@ -158,6 +158,10 @@
S1 = sctp_recv({FromAddr, FromPort, AncData, Data}, S0),
{noreply, S1};

+handle_info({sctp_error, _Socket, FromAddr, FromPort, {_, Error}}, S0) ->
+ S1 = sctp_error({FromAddr, FromPort, Error}, S0),
+ {noreply, S1};
+
%% Handle termination events of the child processes
handle_info({'EXIT', Pid, Reason},
#server_state{sock = Sock, clients = Clients} = S0) ->
@@ -245,6 +249,15 @@
S.


+%% SCTP error handler
+sctp_error({FromAddr, FromPort, Error},
+ #server_state{} = S) ->
+ ?LOG_ERROR("Rx SCTP error (~p:~p): ~p",
+ [FromAddr, FromPort, Error]),
+ sctp_common:report_error(Error),
+ S.
+
+
%% Add a new client to the list, spawning a handler process
client_add(Aid, FromAddr, FromPort,
#server_state{clients = Clients,

To view, visit change 41477. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-MessageType: newchange
Gerrit-Project: erlang/osmo-s1gw
Gerrit-Branch: master
Gerrit-Change-Id: I38203d915d54dacd4e9bbf158ab86f8936585a34
Gerrit-Change-Number: 41477
Gerrit-PatchSet: 1
Gerrit-Owner: fixeria <vyanitskiy@sysmocom.de>