Hoernchen has uploaded this change for review. ( https://gerrit.osmocom.org/c/osmo-trx/+/30966 )
Change subject: ms: adjust float<->integral type conversion ......................................................................
ms: adjust float<->integral type conversion
Given integral type A and non integral type B and depending on rounding mode, optimization, compiler, and phase of the moon A(A)*B != A(A*B) so split the two cases.
While at it, also make the template automagically work for complex types instead of requiring manual casts, the general idea here is to allow inlining and vectorization by treating all args as plain arrays, which is fine.
This works as expected with -tune=native, x64 implies sse2, and we do not target any neon-less arm versions either.
Clang only array length hints can improve this even more.
Change-Id: I93f077f967daf2ed382d12cc20a54846b3688634 --- M Transceiver52M/Complex.h M Transceiver52M/ms/ms.cpp M Transceiver52M/ms/ms.h M Transceiver52M/ms/ms_rx_burst_test.cpp M Transceiver52M/ms/ms_rx_lower.cpp M Transceiver52M/ms/ms_upper.cpp 6 files changed, 46 insertions(+), 20 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmo-trx refs/changes/66/30966/1
diff --git a/Transceiver52M/Complex.h b/Transceiver52M/Complex.h index 6e72346..597a26f 100644 --- a/Transceiver52M/Complex.h +++ b/Transceiver52M/Complex.h @@ -29,7 +29,7 @@ template<class Real> class Complex {
public: - + typedef Real value_type; Real r, i;
/**@name constructors */ diff --git a/Transceiver52M/ms/ms.cpp b/Transceiver52M/ms/ms.cpp index ddcfc3e..6f63a73 100644 --- a/Transceiver52M/ms/ms.cpp +++ b/Transceiver52M/ms/ms.cpp @@ -62,7 +62,7 @@
// float -> int16 blade_sample_type burst_buf[burst->size()]; - convert_and_scale<int16_t, float>(burst_buf, burst->begin(), burst->size() * 2, 1); + convert_and_scale(burst_buf, burst->begin(), burst->size() * 2, 1);
while (1) { GSM::Time target; @@ -126,7 +126,7 @@ t->submit_burst_ts(buf2, burst->size() + pad, send_ts - pad); #ifdef DBGXX signalVector test(burst->size() + pad); - convert_and_scale<float, int16_t>(test.begin(), buf2, burst->size() * 2 + pad, 1.f / float(scale)); + convert_and_scale(test.begin(), buf2, burst->size() * 2 + pad, 1.f / float(scale)); estim_burst_params ebp; auto det = detectAnyBurst(test, 0, 4, 4, CorrType::RACH, 40, &ebp); if (det > 0) diff --git a/Transceiver52M/ms/ms.h b/Transceiver52M/ms/ms.h index 5425283..f79a560 100644 --- a/Transceiver52M/ms/ms.h +++ b/Transceiver52M/ms/ms.h @@ -38,6 +38,7 @@ #error wat? no device.. #endif
+#include "Complex.h" #include "GSMCommon.h" #include "itrq.h"
@@ -53,15 +54,44 @@ start2[i] = t2; } } -template <typename DST_T, typename SRC_T, typename ST> -void convert_and_scale(void *dst, void *src, unsigned int src_len, ST scale) + +namespace cvt_internal +{ +template <typename SRC_T, typename ST> +void convert_and_scale_i(float *dst, const SRC_T *src, unsigned int src_len, ST scale) { for (unsigned int i = 0; i < src_len; i++) - reinterpret_cast<DST_T *>(dst)[i] = static_cast<DST_T>((reinterpret_cast<SRC_T *>(src)[i])) * scale; + dst[i] = static_cast<float>(src[i]) * scale; } -template <typename DST_T, typename SRC_T> void convert_and_scale_default(void *dst, void *src, unsigned int src_len) +template <typename DST_T, typename ST> +void convert_and_scale_i(DST_T *dst, const float *src, unsigned int src_len, ST scale) { - return convert_and_scale<DST_T, SRC_T>(dst, src, src_len, SAMPLE_SCALE_FACTOR); + for (unsigned int i = 0; i < src_len; i++) + dst[i] = static_cast<DST_T>(src[i] * scale); +} +template <typename ST> void convert_and_scale_i(float *dst, const float *src, unsigned int src_len, ST scale) +{ + for (unsigned int i = 0; i < src_len; i++) + dst[i] = src[i] * scale; +} + +template <typename T> struct is_complex : std::false_type { + using baset = T; +}; +template <typename T> struct is_complex<std::complex<T>> : std::true_type { + using baset = typename std::complex<T>::value_type; +}; +template <typename T> struct is_complex<Complex<T>> : std::true_type { + using baset = typename Complex<T>::value_type; +}; +} // namespace cvt_internal + +template <typename DST_T, typename SRC_T, typename ST> +void convert_and_scale(DST_T *dst, const SRC_T *src, unsigned int src_len, ST scale) +{ + using vd = typename cvt_internal::is_complex<DST_T>::baset; + using vs = typename cvt_internal::is_complex<SRC_T>::baset; + return cvt_internal::convert_and_scale_i((vd *)dst, (vs *)src, src_len, scale); }
struct one_burst { diff --git a/Transceiver52M/ms/ms_rx_burst_test.cpp b/Transceiver52M/ms/ms_rx_burst_test.cpp index 7018301..c3ba4ee 100644 --- a/Transceiver52M/ms/ms_rx_burst_test.cpp +++ b/Transceiver52M/ms/ms_rx_burst_test.cpp @@ -90,7 +90,7 @@
if (is_sch) { char outbin[148]; - convert_and_scale_default<float, int16_t>(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2); + convert_and_scale(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, SAMPLE_SCALE_FACTOR); std::stringstream dbgout; #if 0 { @@ -109,8 +109,7 @@ } #endif { - convert_and_scale<float, float>(burst.begin(), burst.begin(), ONE_TS_BURST_LEN * 2, - 1.f / float(scale)); + convert_and_scale(burst.begin(), burst.begin(), ONE_TS_BURST_LEN * 2, 1.f / float(scale));
std::complex<float> channel_imp_resp[CHAN_IMP_RESP_LENGTH * d_OSR]; auto ss = reinterpret_cast<std::complex<float> *>(burst.begin()); @@ -133,7 +132,7 @@ return; } #if 1 - convert_and_scale<float, int16_t>(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(scale)); + convert_and_scale(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(scale)); // std::cerr << "@" << tsc << " " << e.gsmts.FN() << ":" << e.gsmts.TN() << " " << ebp.toa << " " // << std::endl;
diff --git a/Transceiver52M/ms/ms_rx_lower.cpp b/Transceiver52M/ms/ms_rx_lower.cpp index bec1691..e39d72d 100644 --- a/Transceiver52M/ms/ms_rx_lower.cpp +++ b/Transceiver52M/ms/ms_rx_lower.cpp @@ -180,13 +180,11 @@ memset((void *)&sch_acq_buffer[0], 0, sizeof(sch_acq_buffer)); if (is_first_sch_acq) { float max_corr = 0; - convert_and_scale<float, int16_t>(which_out_buffer, which_in_buffer, buf_len * 2, - 1.f / float(rxFullScale)); + convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, 1.f / float(rxFullScale)); start = get_sch_buffer_chan_imp_resp(ss, &channel_imp_resp[0], buf_len, &max_corr); detect_burst(&ss[start], &channel_imp_resp[0], 0, sch_demod_bits); } else { - convert_and_scale<float, int16_t>(which_out_buffer, which_in_buffer, buf_len * 2, - 1.f / float(rxFullScale)); + convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, 1.f / float(rxFullScale)); start = get_sch_chan_imp_resp(ss, &channel_imp_resp[0]); start = start < 39 ? start : 39; start = start > -39 ? start : -39; diff --git a/Transceiver52M/ms/ms_upper.cpp b/Transceiver52M/ms/ms_upper.cpp index 2f3bdc6..63f5926 100644 --- a/Transceiver52M/ms/ms_upper.cpp +++ b/Transceiver52M/ms/ms_upper.cpp @@ -191,7 +191,7 @@ return true; }
- convert_and_scale<float, int16_t>(ss, e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(rxFullScale)); + convert_and_scale(ss, e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(rxFullScale));
pow = energyDetect(sv, 20 * 4 /*sps*/); if (pow < -1) { @@ -292,10 +292,10 @@
// float -> int16 blade_sample_type burst_buf[txburst->size()]; - convert_and_scale<int16_t, float>(burst_buf, txburst->begin(), txburst->size() * 2, 1); + convert_and_scale(burst_buf, txburst->begin(), txburst->size() * 2, 1); #ifdef TXDEBUG auto check = signalVector(txburst->size(), 40); - convert_and_scale<float, int16_t, 1>(check.begin(), burst_buf, txburst->size() * 2); + convert_and_scale(check.begin(), burst_buf, txburst->size() * 2, 1); estim_burst_params ebp; auto d = detectAnyBurst(check, 2, 4, 4, CorrType::RACH, 40, &ebp); if (d) @@ -462,7 +462,6 @@ { auto tall_trxcon_ctx = talloc_init("trxcon context"); signal(SIGPIPE, sighandler); - fesetround(FE_TOWARDZERO);
trxcon::msgb_talloc_ctx_init(tall_trxcon_ctx, 0); trxc_log_init(tall_trxcon_ctx);