[PATCH] libosmocore[master]: conv_acc: Our code requires SSSE3, not just SSE3

This is merely a historical archive of years 2008-2021, before the migration to mailman3.

A maintained and still updated list archive can be found at https://lists.osmocom.org/hyperkitty/list/gerrit-log@lists.osmocom.org/.

Harald Welte gerrit-no-reply at lists.osmocom.org
Fri Nov 17 10:45:39 UTC 2017


Review at  https://gerrit.osmocom.org/4892

conv_acc: Our code requires SSSE3, not just SSE3

The accelerated convolutional decoder uses SSSE3 instructions such
as PSIGNW (via _mm_sign_epi16) which go beyond what SSE3 offers.  So
let's make sure we use the right compiler flag (-mssse3) and also the
right runtime check.

Without this patch, we would use illegal instructions e.g. on Opteron
Gen3 such as Opteron 2427, which are also used as build.opensuse.org
build hosts (build31 through build36) where we wouldn't pass "make
check" as a result.

Change-Id: I2754164384109f2821fd98ffb48f625893f2923d
Fixes: OS#2386
---
M configure.ac
M m4/ax_check_simd.m4
M src/Makefile.am
M src/conv_acc.c
M src/conv_acc_sse.c
M src/conv_acc_sse_avx.c
6 files changed, 29 insertions(+), 29 deletions(-)


  git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/92/4892/1

diff --git a/configure.ac b/configure.ac
index a8c1d2e..f7acf05 100644
--- a/configure.ac
+++ b/configure.ac
@@ -281,7 +281,7 @@
 	AX_CHECK_SIMD
 else
 	AM_CONDITIONAL(HAVE_AVX2, false)
-	AM_CONDITIONAL(HAVE_SSE3, false)
+	AM_CONDITIONAL(HAVE_SSSE3, false)
 	AM_CONDITIONAL(HAVE_SSE4_1, false)
 fi
 
diff --git a/m4/ax_check_simd.m4 b/m4/ax_check_simd.m4
index 8a0ceb7..daca2be 100644
--- a/m4/ax_check_simd.m4
+++ b/m4/ax_check_simd.m4
@@ -19,7 +19,7 @@
 #
 #   And defines:
 #
-#      HAVE_AVX3 / HAVE_SSE3 / HAVE_SSE4.1
+#      HAVE_AVX3 / HAVE_SSSE3 / HAVE_SSE4.1
 #
 # LICENSE
 #
@@ -42,7 +42,7 @@
   AC_REQUIRE([AC_CANONICAL_HOST])
 
   AM_CONDITIONAL(HAVE_AVX2, false)
-  AM_CONDITIONAL(HAVE_SSE3, false)
+  AM_CONDITIONAL(HAVE_SSSE3, false)
   AM_CONDITIONAL(HAVE_SSE4_1, false)
 
   case $host_cpu in
@@ -57,14 +57,14 @@
         AC_MSG_WARN([Your compiler does not support AVX2 instructions])
       fi
 
-      AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
-      if test x"$ax_cv_support_sse3_ext" = x"yes"; then
-        SIMD_FLAGS="$SIMD_FLAGS -msse3"
-        AC_DEFINE(HAVE_SSE3,,
-          [Support SSE3 (Streaming SIMD Extensions 3) instructions])
-        AM_CONDITIONAL(HAVE_SSE3, true)
+      AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
+      if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
+        SIMD_FLAGS="$SIMD_FLAGS -mssse3"
+        AC_DEFINE(HAVE_SSSE3,,
+          [Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
+        AM_CONDITIONAL(HAVE_SSSE3, true)
       else
-        AC_MSG_WARN([Your compiler does not support SSE3 instructions])
+        AC_MSG_WARN([Your compiler does not support SSSE3 instructions])
       fi
 
       AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
diff --git a/src/Makefile.am b/src/Makefile.am
index e7f94ce..3d6e6f7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -23,20 +23,20 @@
 			 macaddr.c stat_item.c stats.c stats_statsd.c prim.c \
 			 conv_acc.c conv_acc_generic.c sercomm.c prbs.c
 
-if HAVE_SSE3
+if HAVE_SSSE3
 libosmocore_la_SOURCES += conv_acc_sse.c
 if HAVE_SSE4_1
-conv_acc_sse.lo : AM_CFLAGS += -msse3 -msse4.1
+conv_acc_sse.lo : AM_CFLAGS += -mssse3 -msse4.1
 else
-conv_acc_sse.lo : AM_CFLAGS += -msse3
+conv_acc_sse.lo : AM_CFLAGS += -mssse3
 endif
 
 if HAVE_AVX2
 libosmocore_la_SOURCES += conv_acc_sse_avx.c
 if HAVE_SSE4_1
-conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2 -msse4.1
+conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2 -msse4.1
 else
-conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2
+conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2
 endif
 endif
 endif
diff --git a/src/conv_acc.c b/src/conv_acc.c
index 33fe264..c16e436 100644
--- a/src/conv_acc.c
+++ b/src/conv_acc.c
@@ -48,7 +48,7 @@
 static int init_complete = 0;
 
 __attribute__ ((visibility("hidden"))) int avx2_supported = 0;
-__attribute__ ((visibility("hidden"))) int sse3_supported = 0;
+__attribute__ ((visibility("hidden"))) int ssse3_supported = 0;
 __attribute__ ((visibility("hidden"))) int sse41_supported = 0;
 
 /**
@@ -75,12 +75,12 @@
 int16_t *osmo_conv_gen_vdec_malloc(size_t n);
 void osmo_conv_gen_vdec_free(int16_t *ptr);
 
-#if defined(HAVE_SSE3)
+#if defined(HAVE_SSSE3)
 int16_t *osmo_conv_sse_vdec_malloc(size_t n);
 void osmo_conv_sse_vdec_free(int16_t *ptr);
 #endif
 
-#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
+#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
 int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n);
 void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
 #endif
@@ -99,7 +99,7 @@
 void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
 	int16_t *sums, int16_t *paths, int norm);
 
-#if defined(HAVE_SSE3)
+#if defined(HAVE_SSSE3)
 void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out,
 	int16_t *sums, int16_t *paths, int norm);
 void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out,
@@ -114,7 +114,7 @@
 	int16_t *sums, int16_t *paths, int norm);
 #endif
 
-#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
+#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
 void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out,
 	int16_t *sums, int16_t *paths, int norm);
 void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out,
@@ -654,8 +654,8 @@
 		avx2_supported = __builtin_cpu_supports("avx2");
 	#endif
 
-	#ifdef HAVE_SSE3
-		sse3_supported = __builtin_cpu_supports("sse3");
+	#ifdef HAVE_SSSE3
+		ssse3_supported = __builtin_cpu_supports("ssse3");
 	#endif
 
 	#ifdef HAVE_SSE4_1
@@ -667,16 +667,16 @@
  * Usage of curly braces is mandatory,
  * because we use multi-line define.
  */
-#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
-	if (sse3_supported && avx2_supported) {
+#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
+	if (ssse3_supported && avx2_supported) {
 		INIT_POINTERS(sse_avx);
-	} else if (sse3_supported) {
+	} else if (ssse3_supported) {
 		INIT_POINTERS(sse);
 	} else {
 		INIT_POINTERS(gen);
 	}
-#elif defined(HAVE_SSE3)
-	if (sse3_supported) {
+#elif defined(HAVE_SSSE3)
+	if (ssse3_supported) {
 		INIT_POINTERS(sse);
 	} else {
 		INIT_POINTERS(gen);
diff --git a/src/conv_acc_sse.c b/src/conv_acc_sse.c
index a9679ef..63d8722 100644
--- a/src/conv_acc_sse.c
+++ b/src/conv_acc_sse.c
@@ -1,6 +1,6 @@
 /*! \file conv_acc_sse.c
  * Accelerated Viterbi decoder implementation
- * for architectures with only SSE3 available. */
+ * for architectures with only SSSE3 available. */
 /*
  * Copyright (C) 2013, 2014 Thomas Tsou <tom at tsou.cc>
  *
diff --git a/src/conv_acc_sse_avx.c b/src/conv_acc_sse_avx.c
index 5b6e704..5ac3c16 100644
--- a/src/conv_acc_sse_avx.c
+++ b/src/conv_acc_sse_avx.c
@@ -1,6 +1,6 @@
 /*! \file conv_acc_sse_avx.c
  * Accelerated Viterbi decoder implementation
- * for architectures with both SSE3 and AVX2 support. */
+ * for architectures with both SSSE3 and AVX2 support. */
 /*
  * Copyright (C) 2013, 2014 Thomas Tsou <tom at tsou.cc>
  *

-- 
To view, visit https://gerrit.osmocom.org/4892
To unsubscribe, visit https://gerrit.osmocom.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2754164384109f2821fd98ffb48f625893f2923d
Gerrit-PatchSet: 1
Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Owner: Harald Welte <laforge at gnumonks.org>



More information about the gerrit-log mailing list