fixeria submitted this change.

View Change

Approvals: pespin: Looks good to me, but someone else must approve Jenkins Builder: Verified osmith: Looks good to me, approved
enb_proxy/mme_registry: add per-MME counters for connection events

Register a set of per-MME counters when an MME is added to the pool,
and increment them from enb_proxy at the relevant state transitions:

- selected (MME chosen for a connection attempt)
- conn_est_timeout (SCTP connection establishment timed out)
- conn_est_failure (SCTP connection establishment failed)
- s1setup_rsp (S1 SETUP RESPONSE received successfully)
- s1setup_failure (S1 SETUP FAILURE received from MME)
- s1setup_rsp_timeout (timed out waiting for S1 SETUP RESPONSE)

A new global aggregate counter ?S1GW_CTR_ENB_PROXY_MME_SELECTED is
also added alongside the existing ?S1GW_CTR_ENB_PROXY_MME_SELECT_ERROR.

Change-Id: Ie0149c1ad0754af6d8f5f95d4e8919993eac3760
Related: SYS#7052
---
M include/s1gw_metrics.hrl
M src/enb_proxy.erl
M src/mme_registry.erl
M src/s1gw_metrics.erl
4 files changed, 33 insertions(+), 0 deletions(-)

diff --git a/include/s1gw_metrics.hrl b/include/s1gw_metrics.hrl
index 249bc6c..06a9719 100644
--- a/include/s1gw_metrics.hrl
+++ b/include/s1gw_metrics.hrl
@@ -50,6 +50,17 @@
-define(S1GW_CTR_ENB_PROXY_UNEXPECTED_PDU, [ctr, enb_proxy, unexpected_pdu]).
-define(S1GW_CTR_ENB_PROXY_MALFORMED_PDU, [ctr, enb_proxy, malformed_pdu]).
-define(S1GW_CTR_ENB_PROXY_MME_SELECT_ERROR, [ctr, enb_proxy, mme_select, error]).
+-define(S1GW_CTR_ENB_PROXY_MME_SELECTED, [ctr, enb_proxy, mme_select, ok]).
+
+%% per-MME counters
+%% NOTE: these counters shall not be listed in ?S1GW_COUNTERS,
+%% but created dynamically for each registered MME.
+-define(S1GW_CTR_MME_SELECTED(N), [ctr, mme, N, selected]).
+-define(S1GW_CTR_MME_CONN_EST_TIMEOUT(N), [ctr, mme, N, conn_est, timeout]).
+-define(S1GW_CTR_MME_CONN_EST_FAILURE(N), [ctr, mme, N, conn_est, failure]).
+-define(S1GW_CTR_MME_S1_SETUP_RSP(N), [ctr, mme, N, s1setup, rsp]).
+-define(S1GW_CTR_MME_S1_SETUP_FAILURE(N), [ctr, mme, N, s1setup, failure]).
+-define(S1GW_CTR_MME_S1_SETUP_RSP_TIMEOUT(N), [ctr, mme, N, s1setup, rsp, timeout]).

%% SCTP related metrics
-define(S1GW_CTR_SCTP_ERROR_ALL, [ctr, sctp, error, all]).
diff --git a/src/enb_proxy.erl b/src/enb_proxy.erl
index 3ce205f..1d479d6 100644
--- a/src/enb_proxy.erl
+++ b/src/enb_proxy.erl
@@ -195,6 +195,8 @@
{ok, MmeInfo} ->
MmeName = maps:get(name, MmeInfo),
?LOG_INFO("MME selection: trying ~p", [MmeName]),
+ ctr_inc(?S1GW_CTR_ENB_PROXY_MME_SELECTED, S),
+ s1gw_metrics:ctr_inc(?S1GW_CTR_MME_SELECTED(MmeName)),
%% Close the old connection, if any
close_sock(S),
%% Initiate connection establishment with the MME
@@ -225,6 +227,7 @@
?LOG_ERROR("MME ~p: timeout establishing connection",
[hd(S#state.tried_mmes)]),
ctr_inc(?S1GW_CTR_ENB_PROXY_CONN_EST_TIMEOUT, S),
+ s1gw_metrics:ctr_inc(?S1GW_CTR_MME_CONN_EST_TIMEOUT(hd(S#state.tried_mmes))),
%% re-enter the state to try again (or another MME)
repeat_state_and_data;

@@ -253,6 +256,7 @@
?LOG_NOTICE("MME ~p: connection establishment failed: ~p",
[MmeName, ConnState]),
ctr_inc(?S1GW_CTR_ENB_PROXY_CONN_EST_FAILURE, S),
+ s1gw_metrics:ctr_inc(?S1GW_CTR_MME_CONN_EST_FAILURE(MmeName)),
%% re-enter the state to try again (or another MME)
repeat_state_and_data
end;
@@ -272,6 +276,7 @@
wait_s1setup_rsp(state_timeout, s1setup_rsp_timeout, S) ->
?LOG_ERROR("Timeout waiting for S1 SETUP RESPONSE from MME"),
ctr_inc(?S1GW_CTR_ENB_PROXY_S1_SETUP_RSP_TIMEOUT, S),
+ s1gw_metrics:ctr_inc(?S1GW_CTR_MME_S1_SETUP_RSP_TIMEOUT(hd(S#state.tried_mmes))),
%% re-enter state 'connecting' to try again (or another MME)
{next_state, connecting, S};

@@ -296,11 +301,13 @@
{{?'id-S1Setup', successfulOutcome}, _IEs} ->
?LOG_INFO("Rx S1 SETUP RESPONSE from MME"),
ctr_inc(?S1GW_CTR_ENB_PROXY_S1_SETUP_RSP, S),
+ s1gw_metrics:ctr_inc(?S1GW_CTR_MME_S1_SETUP_RSP(hd(S#state.tried_mmes))),
sctp_send_from_mme(Data, S),
{next_state, connected, S};
{{?'id-S1Setup', unsuccessfulOutcome}, _IEs} ->
?LOG_NOTICE("Rx S1 SETUP FAILURE from MME"),
ctr_inc(?S1GW_CTR_ENB_PROXY_S1_SETUP_FAILURE, S),
+ s1gw_metrics:ctr_inc(?S1GW_CTR_MME_S1_SETUP_FAILURE(hd(S#state.tried_mmes))),
%% do *not* forward the FAILURE to the eNB
%% re-enter state 'connecting' to try again (or another MME)
{next_state, connecting, S};
diff --git a/src/mme_registry.erl b/src/mme_registry.erl
index d9ca43a..d052f0c 100644
--- a/src/mme_registry.erl
+++ b/src/mme_registry.erl
@@ -199,6 +199,19 @@
lists:subtract(EnbTACs, MmeTACs) == [].


+%% Register per-MME metrics
+-spec mme_ctr_reg(mme_name()) -> ok.
+mme_ctr_reg(MmeName) ->
+ Ctrs = [?S1GW_CTR_MME_SELECTED(MmeName),
+ ?S1GW_CTR_MME_CONN_EST_TIMEOUT(MmeName),
+ ?S1GW_CTR_MME_CONN_EST_FAILURE(MmeName),
+ ?S1GW_CTR_MME_S1_SETUP_RSP(MmeName),
+ ?S1GW_CTR_MME_S1_SETUP_FAILURE(MmeName),
+ ?S1GW_CTR_MME_S1_SETUP_RSP_TIMEOUT(MmeName)],
+ %% counter may already exist (e.g. MME re-registered), so ignore errors
+ lists:foreach(fun(C) -> catch exometer:new(C, counter) end, Ctrs).
+
+
%% Add a new MME if it does not already exist
-spec mme_add(mme_info(), mme_list()) -> {ok, mme_list()} | {error, term()}.
mme_add(MmeInfo0, MMEs) ->
@@ -221,6 +234,7 @@
false ->
?LOG_INFO("MME (name=~p, ~p:~p) registered",
[MmeName, RAddr, RPort]),
+ mme_ctr_reg(MmeName),
{ok, MMEs ++ [MmeInfo1]}
end.

diff --git a/src/s1gw_metrics.erl b/src/s1gw_metrics.erl
index c505a8b..551b821 100644
--- a/src/s1gw_metrics.erl
+++ b/src/s1gw_metrics.erl
@@ -107,6 +107,7 @@
?S1GW_CTR_ENB_PROXY_UNEXPECTED_PDU, %% unexpected PDUs received from eNB/MME
?S1GW_CTR_ENB_PROXY_MALFORMED_PDU, %% malformed PDUs received from eNB/MME
?S1GW_CTR_ENB_PROXY_MME_SELECT_ERROR, %% failed to select an MME (pool exhaustion)
+ ?S1GW_CTR_ENB_PROXY_MME_SELECTED, %% successfully selected an MME

%% SCTP related counters
?S1GW_CTR_SCTP_ERROR_ALL, %% total number of SCTP errors

To view, visit change 42363. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-MessageType: merged
Gerrit-Project: erlang/osmo-s1gw
Gerrit-Branch: master
Gerrit-Change-Id: Ie0149c1ad0754af6d8f5f95d4e8919993eac3760
Gerrit-Change-Number: 42363
Gerrit-PatchSet: 1
Gerrit-Owner: fixeria <vyanitskiy@sysmocom.de>
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria <vyanitskiy@sysmocom.de>
Gerrit-Reviewer: osmith <osmith@sysmocom.de>
Gerrit-Reviewer: pespin <pespin@sysmocom.de>