[PATCH] libosmocore[master]: core/conv: implement optimized Viterbi decoder

This is merely a historical archive of years 2008-2021, before the migration to mailman3.

A maintained and still updated list archive can be found at https://lists.osmocom.org/hyperkitty/list/gerrit-log@lists.osmocom.org/.

Vadim Yanitskiy gerrit-no-reply at lists.osmocom.org
Thu Jan 12 12:55:27 UTC 2017


Hello tnt, Alexander Chemeris, Jenkins Builder,

I'd like you to reexamine a change.  Please visit

    https://gerrit.osmocom.org/1337

to look at the new patch set (#5).

core/conv: implement optimized Viterbi decoder

Add a separate, faster convolution decoding implementation for rates
up to N=4 and constraint lengths of K=5 and K=7, which covers the
most GSM code uses. The decoding algorithm exploits the symmetric
structure of the Viterbi add-compare-select (ACS) operation - commonly
known as the ACS butterfly. This shift-register optimization can be
found in the well-known text by Dave Forney.

Forney, G.D., "The Viterbi Algorithm," Proc. of the IEEE, March 1973.

Implementation is non-architecture specific and improves performance on
x86 as well as ARM processors. Existing API is unchanged with optimized
code being called internally for supported codes.

The original code was relicensed under GPLv2-or-later with permission
of copyright holder - Tom Tsou.

Change-Id: I74d355274b4176a7d924f91ef3c96912ce338fb2
---
M src/Makefile.am
M src/conv.c
A src/viterbi.c
A src/viterbi_gen.c
4 files changed, 807 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/37/1337/5

diff --git a/src/Makefile.am b/src/Makefile.am
index 0cf2665..6948e1a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -16,7 +16,8 @@
 			 gsmtap_util.c crc16.c panic.c backtrace.c \
 			 conv.c application.c rbtree.c strrb.c \
 			 loggingrb.c crc8gen.c crc16gen.c crc32gen.c crc64gen.c \
-			 macaddr.c stat_item.c stats.c stats_statsd.c prim.c
+			 macaddr.c stat_item.c stats.c stats_statsd.c prim.c \
+			 viterbi.c viterbi_gen.c
 
 BUILT_SOURCES = crc8gen.c crc16gen.c crc32gen.c crc64gen.c
 
diff --git a/src/conv.c b/src/conv.c
index f13deef..79b3a7c 100644
--- a/src/conv.c
+++ b/src/conv.c
@@ -238,6 +238,11 @@
 
 #define MAX_AE 0x00ffffff
 
+/* Forward declaration for accerlated decoding with certain codes */
+int
+osmo_conv_decode_acc(const struct osmo_conv_code *code,
+                     const sbit_t *input, ubit_t *output);
+
 void
 osmo_conv_decode_init(struct osmo_conv_decoder *decoder,
                       const struct osmo_conv_code *code, int len, int start_state)
@@ -606,6 +611,10 @@
 	struct osmo_conv_decoder decoder;
 	int rv, l;
 
+	/* Use accelerated implementation for supported codes */
+	if ((code->N <= 4) && ((code->K == 5) || (code->K == 7)))
+		return osmo_conv_decode_acc(code, input, output);
+
 	osmo_conv_decode_init(&decoder, code, 0, 0);
 
 	if (code->term == CONV_TERM_TAIL_BITING) {
diff --git a/src/viterbi.c b/src/viterbi.c
new file mode 100644
index 0000000..9422575
--- /dev/null
+++ b/src/viterbi.c
@@ -0,0 +1,603 @@
+/*
+ * Viterbi decoder
+ *
+ * Copyright (C) 2013, 2014 Thomas Tsou <tom at tsou.cc>
+ *
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <osmocom/core/conv.h>
+#include "config.h"
+
+#define BIT2NRZ(REG,N)	(((REG >> N) & 0x01) * 2 - 1) * -1
+#define NUM_STATES(K)	(K == 7 ? 64 : 16)
+#define SSE_ALIGN	16
+
+/* Forward Metric Units */
+void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm);
+
+/* Trellis State
+ * state - Internal lshift register value
+ * prev  - Register values of previous 0 and 1 states
+ */
+struct vstate {
+	unsigned state;
+	unsigned prev[2];
+};
+
+/* Trellis Object
+ * num_states - Number of states in the trellis
+ * sums       - Accumulated path metrics
+ * outputs    - Trellis output values
+ * vals       - Input value that led to each state
+ */
+struct vtrellis {
+	int num_states;
+	int16_t *sums;
+	int16_t *outputs;
+	uint8_t *vals;
+};
+
+/* Viterbi Decoder
+ * n         - Code order
+ * k         - Constraint length
+ * len       - Horizontal length of trellis
+ * recursive - Set to '1' if the code is recursive
+ * intrvl    - Normalization interval
+ * trellis   - Trellis object
+ * punc      - Puncturing sequence
+ * paths     - Trellis paths
+ */
+struct vdecoder {
+	int n;
+	int k;
+	int len;
+	int recursive;
+	int intrvl;
+	struct vtrellis *trellis;
+	int *punc;
+	int16_t **paths;
+
+	void (*metric_func)(const int8_t *, const int16_t *,
+		int16_t *, int16_t *, int);
+};
+
+/* Aligned Memory Allocator
+ * SSE requires 16-byte memory alignment. We store relevant trellis values
+ * (accumulated sums, outputs, and path decisions) as 16 bit signed integers
+ * so the allocated memory is casted as such.
+ */
+static int16_t *vdec_malloc(size_t n)
+{
+#ifdef HAVE_SSE3
+	return (int16_t *) memalign(SSE_ALIGN, sizeof(int16_t) * n);
+#else
+	return (int16_t *) malloc(sizeof(int16_t) * n);
+#endif
+}
+
+/* Accessor calls */
+static inline int conv_code_recursive(const struct osmo_conv_code *code)
+{
+	return code->next_term_output ? 1 : 0;
+}
+
+/* Left shift and mask for finding the previous state */
+static unsigned vstate_lshift(unsigned reg, int k, int val)
+{
+	unsigned mask;
+
+	if (k == 5)
+		mask = 0x0e;
+	else if (k == 7)
+		mask = 0x3e;
+	else
+		mask = 0;
+
+	return ((reg << 1) & mask) | val;
+}
+
+/* Bit endian manipulators */
+static inline unsigned bitswap2(unsigned v)
+{
+	return ((v & 0x02) >> 1) | ((v & 0x01) << 1);
+}
+
+static inline unsigned bitswap3(unsigned v)
+{
+	return ((v & 0x04) >> 2) | ((v & 0x02) >> 0) |
+		((v & 0x01) << 2);
+}
+
+static inline unsigned bitswap4(unsigned v)
+{
+	return ((v & 0x08) >> 3) | ((v & 0x04) >> 1) |
+		((v & 0x02) << 1) | ((v & 0x01) << 3);
+}
+
+static inline unsigned bitswap5(unsigned v)
+{
+	return ((v & 0x10) >> 4) | ((v & 0x08) >> 2) | ((v & 0x04) >> 0) |
+		((v & 0x02) << 2) | ((v & 0x01) << 4);
+}
+
+static inline unsigned bitswap6(unsigned v)
+{
+	return ((v & 0x20) >> 5) | ((v & 0x10) >> 3) | ((v & 0x08) >> 1) |
+		((v & 0x04) << 1) | ((v & 0x02) << 3) | ((v & 0x01) << 5);
+}
+
+static unsigned bitswap(unsigned v, unsigned n)
+{
+	switch (n) {
+	case 1:
+		return v;
+	case 2:
+		return bitswap2(v);
+	case 3:
+		return bitswap3(v);
+	case 4:
+		return bitswap4(v);
+	case 5:
+		return bitswap5(v);
+	case 6:
+		return bitswap6(v);
+	default:
+		return 0;
+	}
+}
+
+/* Generate non-recursive state output from generator state table
+ * Note that the shift register moves right (i.e. the most recent bit is
+ * shifted into the register at k-1 bit of the register), which is typical
+ * textbook representation. The API transition table expects the most recent
+ * bit in the low order bit, or left shift. A bitswap operation is required
+ * to accommodate the difference.
+ */
+static unsigned gen_output(struct vstate *state, int val,
+	const struct osmo_conv_code *code)
+{
+	unsigned out, prev;
+
+	prev = bitswap(state->prev[0], code->K - 1);
+	out = code->next_output[prev][val];
+	out = bitswap(out, code->N);
+
+	return out;
+}
+
+/* Populate non-recursive trellis state
+ * For a given state defined by the k-1 length shift register, find the
+ * value of the input bit that drove the trellis to that state. Also
+ * generate the N outputs of the generator polynomial at that state.
+ */
+static int gen_state_info(uint8_t *val, unsigned reg,
+	int16_t *output, const struct osmo_conv_code *code)
+{
+	int i;
+	unsigned out;
+	struct vstate state;
+
+	/* Previous '0' state */
+	state.state = reg;
+	state.prev[0] = vstate_lshift(reg, code->K, 0);
+	state.prev[1] = vstate_lshift(reg, code->K, 1);
+
+	*val = (reg >> (code->K - 2)) & 0x01;
+
+	/* Transition output */
+	out = gen_output(&state, *val, code);
+
+	/* Unpack to NRZ */
+	for (i = 0; i < code->N; i++)
+		output[i] = BIT2NRZ(out, i);
+
+	return 0;
+}
+
+/* Generate recursive state output from generator state table */
+static unsigned gen_recursive_output(struct vstate *state,
+	uint8_t *val, unsigned reg,
+	const struct osmo_conv_code *code, int pos)
+{
+	int val0, val1;
+	unsigned out, prev;
+
+	/* Previous '0' state */
+	prev = vstate_lshift(reg, code->K, 0);
+	prev = bitswap(prev, code->K - 1);
+
+	/* Input value */
+	val0 = (reg >> (code->K - 2)) & 0x01;
+	val1 = (code->next_term_output[prev] >> pos) & 0x01;
+	*val = val0 == val1 ? 0 : 1;
+
+	/* Wrapper for osmocom state access */
+	prev = bitswap(state->prev[0], code->K - 1);
+
+	/* Compute the transition output */
+	out = code->next_output[prev][*val];
+	out = bitswap(out, code->N);
+
+	return out;
+}
+
+/* Populate recursive trellis state
+ * The bit position of the systematic bit is not explicitly marked by the
+ * API, so it must be extracted from the generator table. Otherwise,
+ * populate the trellis similar to the non-recursive version.
+ * Non-systematic recursive codes are not supported.
+ */
+static int gen_recursive_state_info(uint8_t *val,
+	unsigned reg, int16_t *output, const struct osmo_conv_code *code)
+{
+	int i, j, pos = -1;
+	int ns = NUM_STATES(code->K);
+	unsigned out;
+	struct vstate state;
+
+	/* Previous '0' and '1' states */
+	state.state = reg;
+	state.prev[0] = vstate_lshift(reg, code->K, 0);
+	state.prev[1] = vstate_lshift(reg, code->K, 1);
+
+	/* Find recursive bit location */
+	for (i = 0; i < code->N; i++) {
+		for (j = 0; j < ns; j++) {
+			if ((code->next_output[j][0] >> i) & 0x01)
+				break;
+		}
+
+		if (j == ns) {
+			pos = i;
+			break;
+		}
+	}
+
+	/* Non-systematic recursive code not supported */
+	if (pos < 0)
+		return -EPROTO;
+
+	/* Transition output */
+	out = gen_recursive_output(&state, val, reg, code, pos);
+
+	/* Unpack to NRZ */
+	for (i = 0; i < code->N; i++)
+		output[i] = BIT2NRZ(out, i);
+
+	return 0;
+}
+
+/* Release the trellis */
+static void free_trellis(struct vtrellis *trellis)
+{
+	if (!trellis)
+		return;
+
+	free(trellis->vals);
+	free(trellis->outputs);
+	free(trellis->sums);
+	free(trellis);
+}
+
+/* Allocate and initialize the trellis object
+ * Initialization consists of generating the outputs and output value of a
+ * given state. Due to trellis symmetry and anti-symmetry, only one of the
+ * transition paths is utilized by the butterfly operation in the forward
+ * recursion, so only one set of N outputs is required per state variable.
+ */
+static struct vtrellis *generate_trellis(const struct osmo_conv_code *code)
+{
+	int i, rc = -1;
+	struct vtrellis *trellis;
+	int16_t *outputs;
+
+	int ns = NUM_STATES(code->K);
+	int recursive = conv_code_recursive(code);
+	int olen = (code->N == 2) ? 2 : 4;
+
+	trellis = (struct vtrellis *) calloc(1, sizeof(struct vtrellis));
+	trellis->num_states = ns;
+	trellis->sums =	vdec_malloc(ns);
+	trellis->outputs = vdec_malloc(ns * olen);
+	trellis->vals = (uint8_t *) malloc(ns * sizeof(uint8_t));
+
+	if (!trellis->sums || !trellis->outputs)
+		goto fail;
+
+	/* Populate the trellis state objects */
+	for (i = 0; i < ns; i++) {
+		outputs = &trellis->outputs[olen * i];
+		if (recursive) {
+			rc = gen_recursive_state_info(&trellis->vals[i],
+				i, outputs, code);
+		} else {
+			rc = gen_state_info(&trellis->vals[i],
+				i, outputs, code);
+		}
+	}
+
+	if (rc < 0)
+		goto fail;
+
+	return trellis;
+
+fail:
+	free_trellis(trellis);
+	return NULL;
+}
+
+/* Reset decoder
+ * Set accumulated path metrics to zero. For termination other than
+ * tail-biting, initialize the zero state as the encoder starting state.
+ * Initialize with the maximum accumulated sum at length equal to the
+ * constraint length.
+ */
+static void reset_decoder(struct vdecoder *dec, int term)
+{
+	int ns = dec->trellis->num_states;
+
+	memset(dec->trellis->sums, 0, sizeof(int16_t) * ns);
+
+	if (term != CONV_TERM_TAIL_BITING)
+		dec->trellis->sums[0] = INT8_MAX * dec->n * dec->k;
+}
+
+static void _traceback(struct vdecoder *dec,
+	unsigned state, uint8_t *out, int len)
+{
+	int i;
+	unsigned path;
+
+	for (i = len - 1; i >= 0; i--) {
+		path = dec->paths[i][state] + 1;
+		out[i] = dec->trellis->vals[state];
+		state = vstate_lshift(state, dec->k, path);
+	}
+}
+
+static void _traceback_rec(struct vdecoder *dec,
+	unsigned state, uint8_t *out, int len)
+{
+	int i;
+	unsigned path;
+
+	for (i = len - 1; i >= 0; i--) {
+		path = dec->paths[i][state] + 1;
+		out[i] = path ^ dec->trellis->vals[state];
+		state = vstate_lshift(state, dec->k, path);
+	}
+}
+
+/* Traceback and generate decoded output
+ * Find the largest accumulated path metric at the final state except for
+ * the zero terminated case, where we assume the final state is always zero.
+ */
+static int traceback(struct vdecoder *dec, uint8_t *out, int term, int len)
+{
+	int i, sum, max = -1;
+	unsigned path, state = 0;
+
+	if (term != CONV_TERM_FLUSH) {
+		for (i = 0; i < dec->trellis->num_states; i++) {
+			sum = dec->trellis->sums[i];
+			if (sum > max) {
+				max = sum;
+				state = i;
+			}
+		}
+
+		if (max < 0)
+			return -EPROTO;
+	}
+
+	for (i = dec->len - 1; i >= len; i--) {
+		path = dec->paths[i][state] + 1;
+		state = vstate_lshift(state, dec->k, path);
+	}
+
+	if (dec->recursive)
+		_traceback_rec(dec, state, out, len);
+	else
+		_traceback(dec, state, out, len);
+
+	return 0;
+}
+
+/* Release decoder object */
+static void free_vdec(struct vdecoder *dec)
+{
+	if (!dec)
+		return;
+
+	free(dec->paths[0]);
+	free(dec->paths);
+	free_trellis(dec->trellis);
+	free(dec);
+}
+
+/* Allocate decoder object
+ * Subtract the constraint length K on the normalization interval to
+ * accommodate the initialization path metric at state zero.
+ */
+static struct vdecoder *alloc_vdec(const struct osmo_conv_code *code)
+{
+	int i, ns;
+	struct vdecoder *dec;
+
+	ns = NUM_STATES(code->K);
+
+	dec = (struct vdecoder *) calloc(1, sizeof(struct vdecoder));
+	dec->n = code->N;
+	dec->k = code->K;
+	dec->recursive = conv_code_recursive(code);
+	dec->intrvl = INT16_MAX / (dec->n * INT8_MAX) - dec->k;
+
+	if (dec->k == 5) {
+		switch (dec->n) {
+		case 2:
+			dec->metric_func = osmo_conv_gen_metrics_k5_n2;
+			break;
+		case 3:
+			dec->metric_func = osmo_conv_gen_metrics_k5_n3;
+			break;
+		case 4:
+			dec->metric_func = osmo_conv_gen_metrics_k5_n4;
+			break;
+		default:
+			goto fail;
+		}
+	} else if (dec->k == 7) {
+		switch (dec->n) {
+		case 2:
+			dec->metric_func = osmo_conv_gen_metrics_k7_n2;
+			break;
+		case 3:
+			dec->metric_func = osmo_conv_gen_metrics_k7_n3;
+			break;
+		case 4:
+			dec->metric_func = osmo_conv_gen_metrics_k7_n4;
+			break;
+		default:
+			goto fail;
+		}
+	} else {
+		goto fail;
+	}
+
+	if (code->term == CONV_TERM_FLUSH)
+		dec->len = code->len + code->K - 1;
+	else
+		dec->len = code->len;
+
+	dec->trellis = generate_trellis(code);
+	if (!dec->trellis)
+		goto fail;
+
+	dec->paths = (int16_t **) malloc(sizeof(int16_t *) * dec->len);
+	dec->paths[0] = vdec_malloc(ns * dec->len);
+	for (i = 1; i < dec->len; i++)
+		dec->paths[i] = &dec->paths[0][i * ns];
+
+	return dec;
+
+fail:
+	free_vdec(dec);
+	return NULL;
+}
+
+/* Depuncture sequence with nagative value terminated puncturing matrix */
+static int depuncture(const int8_t *in, const int *punc, int8_t *out, int len)
+{
+	int i, n = 0, m = 0;
+
+	for (i = 0; i < len; i++) {
+		if (i == punc[n]) {
+			out[i] = 0;
+			n++;
+			continue;
+		}
+
+		out[i] = in[m++];
+	}
+
+	return 0;
+}
+
+/* Forward trellis recursion
+ * Generate branch metrics and path metrics with a combined function. Only
+ * accumulated path metric sums and path selections are stored. Normalize on
+ * the interval specified by the decoder.
+ */
+static void _conv_decode(struct vdecoder *dec, const int8_t *seq, int _cnt)
+{
+	int i, len = dec->len;
+	struct vtrellis *trellis = dec->trellis;
+
+	for (i = 0; i < len; i++) {
+		dec->metric_func(&seq[dec->n * i],
+			trellis->outputs,
+			trellis->sums,
+			dec->paths[i],
+			!(i % dec->intrvl));
+	}
+}
+
+/* Convolutional decode with a decoder object
+ * Initial puncturing run if necessary followed by the forward recursion.
+ * For tail-biting perform a second pass before running the backward
+ * traceback operation.
+ */
+static int conv_decode(struct vdecoder *dec, const int8_t *seq,
+	const int *punc, uint8_t *out, int len, int term)
+{
+	int cnt = 0;
+	int8_t depunc[dec->len * dec->n];
+
+	reset_decoder(dec, term);
+
+	if (punc) {
+		depuncture(seq, punc, depunc, dec->len * dec->n);
+		seq = depunc;
+	}
+
+	/* Propagate through the trellis with interval normalization */
+	_conv_decode(dec, seq, cnt);
+
+	if (term == CONV_TERM_TAIL_BITING)
+		_conv_decode(dec, seq, cnt);
+
+	return traceback(dec, out, term, len);
+}
+
+/* All-in-one Viterbi decoding  */
+int osmo_conv_decode_acc(const struct osmo_conv_code *code,
+	const sbit_t *input, ubit_t *output)
+{
+	int rc;
+	struct vdecoder *vdec;
+
+	if ((code->N < 2) || (code->N > 4) || (code->len < 1) ||
+		((code->K != 5) && (code->K != 7)))
+		return -EINVAL;
+
+	vdec = alloc_vdec(code);
+	if (!vdec)
+		return -EFAULT;
+
+	rc = conv_decode(vdec, input, code->puncture,
+		output, code->len, code->term);
+
+	free_vdec(vdec);
+
+	return rc;
+}
diff --git a/src/viterbi_gen.c b/src/viterbi_gen.c
new file mode 100644
index 0000000..219b25b
--- /dev/null
+++ b/src/viterbi_gen.c
@@ -0,0 +1,193 @@
+/*
+ * Viterbi decoder
+ *
+ * Copyright (C) 2013, 2014 Thomas Tsou <tom at tsou.cc>
+ *
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+/* Add-Compare-Select (ACS-Butterfly)
+ * Compute 4 accumulated path metrics and 4 path selections. Note that path
+ * selections are store as -1 and 0 rather than 0 and 1. This is to match
+ * the output format of the SSE packed compare instruction 'pmaxuw'.
+ */
+
+static void acs_butterfly(int state, int num_states,
+	int16_t metric, int16_t *sum,
+	int16_t *new_sum, int16_t *path)
+{
+	int state0, state1;
+	int sum0, sum1, sum2, sum3;
+
+	state0 = *(sum + (2 * state + 0));
+	state1 = *(sum + (2 * state + 1));
+
+	sum0 = state0 + metric;
+	sum1 = state1 - metric;
+	sum2 = state0 - metric;
+	sum3 = state1 + metric;
+
+	if (sum0 > sum1) {
+		*new_sum = sum0;
+		*path = -1;
+	} else {
+		*new_sum = sum1;
+		*path = 0;
+	}
+
+	if (sum2 > sum3) {
+		*(new_sum + num_states / 2) = sum2;
+		*(path + num_states / 2) = -1;
+	} else {
+		*(new_sum + num_states / 2) = sum3;
+		*(path + num_states / 2) = 0;
+	}
+}
+
+/* Branch metrics unit N=2 */
+static void gen_branch_metrics_n2(int num_states, const int8_t *seq,
+	const int16_t *out, int16_t *metrics)
+{
+	int i;
+
+	for (i = 0; i < num_states / 2; i++) {
+		metrics[i] = seq[0] * out[2 * i + 0] +
+			seq[1] * out[2 * i + 1];
+	}
+}
+
+/* Branch metrics unit N=3 */
+static void gen_branch_metrics_n3(int num_states, const int8_t *seq,
+	const int16_t *out, int16_t *metrics)
+{
+	int i;
+
+	for (i = 0; i < num_states / 2; i++) {
+		metrics[i] = seq[0] * out[4 * i + 0] +
+			seq[1] * out[4 * i + 1] +
+			seq[2] * out[4 * i + 2];
+	}
+}
+
+/* Branch metrics unit N=4 */
+static void gen_branch_metrics_n4(int num_states, const int8_t *seq,
+	const int16_t *out, int16_t *metrics)
+{
+	int i;
+
+	for (i = 0; i < num_states / 2; i++) {
+		metrics[i] = seq[0] * out[4 * i + 0] +
+			seq[1] * out[4 * i + 1] +
+			seq[2] * out[4 * i + 2] +
+			seq[3] * out[4 * i + 3];
+	}
+}
+
+/* Path metric unit */
+static void gen_path_metrics(int num_states, int16_t *sums,
+	int16_t *metrics, int16_t *paths, int norm)
+{
+	int i;
+	int16_t min;
+	int16_t new_sums[num_states];
+
+	for (i = 0; i < num_states / 2; i++)
+		acs_butterfly(i, num_states, metrics[i],
+			sums, &new_sums[i], &paths[i]);
+
+	if (norm) {
+		min = new_sums[0];
+
+		for (i = 1; i < num_states; i++)
+			if (new_sums[i] < min)
+				min = new_sums[i];
+
+		for (i = 0; i < num_states; i++)
+			new_sums[i] -= min;
+	}
+
+	memcpy(sums, new_sums, num_states * sizeof(int16_t));
+}
+
+/* 16-state branch-path metrics units (K=5) */
+__attribute__ ((visibility("hidden")))
+void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm)
+{
+	int16_t metrics[8];
+
+	gen_branch_metrics_n2(16, seq, out, metrics);
+	gen_path_metrics(16, sums, metrics, paths, norm);
+}
+
+__attribute__ ((visibility("hidden")))
+void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm)
+{
+	int16_t metrics[8];
+
+	gen_branch_metrics_n3(16, seq, out, metrics);
+	gen_path_metrics(16, sums, metrics, paths, norm);
+
+}
+
+__attribute__ ((visibility("hidden")))
+void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm)
+{
+	int16_t metrics[8];
+
+	gen_branch_metrics_n4(16, seq, out, metrics);
+	gen_path_metrics(16, sums, metrics, paths, norm);
+
+}
+
+/* 64-state branch-path metrics units (K=7) */
+__attribute__ ((visibility("hidden")))
+void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm)
+{
+	int16_t metrics[32];
+
+	gen_branch_metrics_n2(64, seq, out, metrics);
+	gen_path_metrics(64, sums, metrics, paths, norm);
+
+}
+
+__attribute__ ((visibility("hidden")))
+void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm)
+{
+	int16_t metrics[32];
+
+	gen_branch_metrics_n3(64, seq, out, metrics);
+	gen_path_metrics(64, sums, metrics, paths, norm);
+
+}
+
+__attribute__ ((visibility("hidden")))
+void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
+	int16_t *sums, int16_t *paths, int norm)
+{
+	int16_t metrics[32];
+
+	gen_branch_metrics_n4(64, seq, out, metrics);
+	gen_path_metrics(64, sums, metrics, paths, norm);
+}

-- 
To view, visit https://gerrit.osmocom.org/1337
To unsubscribe, visit https://gerrit.osmocom.org/settings

Gerrit-MessageType: newpatchset
Gerrit-Change-Id: I74d355274b4176a7d924f91ef3c96912ce338fb2
Gerrit-PatchSet: 5
Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Owner: Vadim Yanitskiy <axilirator at gmail.com>
Gerrit-Reviewer: Alexander Chemeris <Alexander.Chemeris at gmail.com>
Gerrit-Reviewer: Harald Welte <laforge at gnumonks.org>
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: Tom Tsou <tom at tsou.cc>
Gerrit-Reviewer: Vadim Yanitskiy <axilirator at gmail.com>
Gerrit-Reviewer: tnt <tnt at 246tNt.com>



More information about the gerrit-log mailing list