Timur Davydov has uploaded this change for review. ( https://gerrit.osmocom.org/c/osmo-trx/+/42248?usp=email )
Change subject: transceiver: add optional Laurent burst LUT modulation (int16) ......................................................................
transceiver: add optional Laurent burst LUT modulation (int16)
Introduce an optional Burst LUT optimization for Laurent-based burst modulation, enabled via --with-burst-lut.
- Add precomputed Laurent LUT tables (float and int16 I/Q variants) - Generate LUT at initialization (prepareBurstLUT) - Use LUT-based modulation for sps=4 when enabled - Keep original modulateBurstLaurent() as fallback - Add configure.ac option and USE_BURST_LUT define
This reduces runtime computation by replacing per-burst Laurent processing with memcpy-based LUT lookups.
Change-Id: I1715c2d33dc55fe1c7be5c6e7259d378ea5e80b2 --- M Transceiver52M/sigProcLib.cpp M configure.ac 2 files changed, 180 insertions(+), 0 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmo-trx refs/changes/48/42248/1
diff --git a/Transceiver52M/sigProcLib.cpp b/Transceiver52M/sigProcLib.cpp index 5fac365..bcea848 100644 --- a/Transceiver52M/sigProcLib.cpp +++ b/Transceiver52M/sigProcLib.cpp @@ -51,6 +51,29 @@ /** Lookup tables for trigonometric approximation */ static float sincTable[TABLESIZE+1]; // add 1 element for wrap around
+/** Lookup tables for burst processing */ +#ifdef USE_BURST_LUT +#define BURST_SCALING 32700 /* Scaling factor */ + +static bool burst_lut_prepared = false; + +/** Complex Laurent LUT table (float complex samples). + * Layout: prologue, per-4bit-combination filters and epilogue. Each + * entry contains complex<float> samples used for fast burst modulation. + */ +static complex lut_laurent4_prologue[4 * 4]; /* 4 prologue filters, each with 4 taps */ +static complex lut_laurent4[16 * 4 * 4]; /* 16 possible 4-bit combinations, each with 4 filters of 4 taps */ +static complex lut_laurent4_epilogue[4 * 4]; /* 4 epilogue filters, each with 4 taps */ + +/** Integer LUT table (interleaved I/Q int16 samples). + * These tables contain int16_t I/Q pairs (interleaved) generated from + * the complex LUT and scaled by `BURST_SCALING` for integer output. + */ +static int16_t i16_lut_laurent4_prologue[4 * 4 * 2]; /* 4 prologue filters, each with 4 taps */ +static int16_t i16_lut_laurent4[16 * 4 * 4 * 2]; /* 16 possible 4-bit combinations */ +static int16_t i16_lut_laurent4_epilogue[4 * 4 * 2]; /* 4 epilogue filters, each with 4 taps */ +#endif /* USE_BURST_LUT */ + /** Constants */ static const float M_PI_F = (float)M_PI;
@@ -669,6 +692,146 @@ return c0_shaped; }
+#ifdef USE_BURST_LUT +/** + * @brief Modulate a burst using the precomputed Laurent approximation LUT. + * @param bits The input bit vector to be modulated (bit values 0/1). + * @param bitlen The length of the input bit vector. + * @param outbuf Output buffer to store interleaved int16 I/Q samples. + * The buffer is filled with int16 pairs (I0,Q0,I1,Q1,...). + * @return The number of complex samples (I/Q pairs) written to `outbuf`. + */ +unsigned modulateBits(uint8_t *bits, unsigned bitlen, int16_t *outbuf) +{ + unsigned off = 0; + unsigned lut_idx = (bits[0] << 1) | (bits[1]); + + memcpy(outbuf + off, i16_lut_laurent4_prologue, sizeof(int16_t) * 12 * 2); + off += 12 * 2; + for (unsigned i = 2; i < bitlen; ++i) { + lut_idx = ((lut_idx << 1) | bits[i]) & 0xf; + memcpy(outbuf + off, &i16_lut_laurent4[2 * (64 * (3 - (i & 0x3)) + 4 * lut_idx)], sizeof(int16_t) * 4 * 2); + off += 4 * 2; + } + for (unsigned i = bitlen; i < bitlen + 1; ++i) { + lut_idx = ((lut_idx << 1) | 0) & 0xf; + memcpy(outbuf + off, &i16_lut_laurent4[2 * (64 * (3 - (i & 0x3)) + 4 * lut_idx)], sizeof(int16_t) * 4 * 2); + off += 4 * 2; + } + memcpy(outbuf + off, i16_lut_laurent4_epilogue, sizeof(int16_t) * 12 * 2); + off += 12 * 2; + + return off >> 1; +} + +/** + * @brief Modulate a burst using the precomputed Laurent approximation LUT. + * @param bits The input bit vector to be modulated. + * @return A newly allocated `signalVector` containing complex<float> + * samples (I/Q) ready for transmission. The vector length is 625 + * samples. The caller is responsible for deleting the returned + * `signalVector`. + */ +static signalVector *modulateBurstLaurentLutInt(const BitVector& bits) +{ + signalVector *burst = new signalVector(625, 16); + signalVector::iterator it = burst->begin(); + + memcpy((void *)it, lut_laurent4_prologue, sizeof(complex) * 12); + it += 12; + + unsigned lut_idx = (bits[0] << 1) | (bits[1]); + for (unsigned i = 2; i < bits.size(); i++) { + lut_idx = ((lut_idx << 1) | bits[i]) & 0xf; + memcpy((void *)it, &lut_laurent4[64 * (3 - (i % 4)) + 4 * lut_idx], sizeof(complex) * 4); + it += 4; + } + for (unsigned i = bits.size(); i < bits.size() + 1; ++i) { + lut_idx = ((lut_idx << 1) | 0) & 0xf; + memcpy((void *)it, &lut_laurent4[64 * (3 - (i % 4)) + 4 * lut_idx], sizeof(complex) * 4); + it += 4; + } + + memcpy((void *)it, lut_laurent4_epilogue, sizeof(complex) * 12); + it += 12; + + return burst; +} + +/** + * @brief Modulate a burst using the precomputed Laurent LUT if prepared, + * otherwise fall back to the original Laurent modulation routine. + * @param bits The input bit vector to be modulated. + * @note If `burst_lut_prepared` is true, `modulateBurstLaurentLutInt` + * is used which returns a `signalVector` of length 625. Otherwise, + * `modulateBurstLaurent` is called instead. + * @return A newly allocated `signalVector` containing complex<float> + * samples (I/Q) ready for transmission. The caller owns the + * returned vector. + */ +static signalVector *modulateBurstLaurentLut(const BitVector &bits) +{ + return (burst_lut_prepared) ? modulateBurstLaurentLutInt(bits) : modulateBurstLaurent(bits); +} + +/** + * @brief Prepare the burst modulation LUT by generating the Laurent approximation for all possible + * bit combinations and storing them in the LUT tables. + * @return 0 on success, -1 on failure. + */ +static int prepareBurstLUT() +{ + complex modtbl[256 * 4 * 4]; + const unsigned B = 8; + for (int b = 0; b < (1 << B); ++b) { + char v[NORMAL_BURST_NBITS + 1]; /* extra byte for null terminator */ + memset(v, '0', NORMAL_BURST_NBITS); + v[NORMAL_BURST_NBITS] = 0; + + for (int c = 0; c < B; ++c) { + if ((b >> (B - c - 1)) & 1) { + v[8 + c] = '1'; + } + } + + BitVector bv(v); + signalVector* sv = modulateBurstLaurent(bv); + + for (unsigned t = 0; t < 16; ++t) { + lut_laurent4_prologue[t] = sv->operator[](t); + i16_lut_laurent4_prologue[2 * t + 0] = lut_laurent4_prologue[t].r * BURST_SCALING; + i16_lut_laurent4_prologue[2 * t + 1] = lut_laurent4_prologue[t].i * BURST_SCALING; + } + + for (unsigned t = 600; t < 616; ++t) { + lut_laurent4_epilogue[t - 600] = sv->operator[](t); + i16_lut_laurent4_epilogue[2 * (t - 600) + 0] = lut_laurent4_epilogue[t - 600].r * BURST_SCALING; + i16_lut_laurent4_epilogue[2 * (t - 600) + 1] = lut_laurent4_epilogue[t - 600].i * BURST_SCALING; + } + + int j_off = (8 + B - 3) * 4; + for (int k = 0; k < 16; ++k) { + modtbl[b * 16 + k] = sv->operator[](j_off + k); + } + } + + // Compose modulation LUT table + for (int q = 0; q < 4; ++q) { // quadrant + for (int i = 0; i < 16; ++i) { + for (int k = 0; k < 4; ++k) { + lut_laurent4[(q * 16 + i) * 4 + k ] = modtbl[(i << q) * 16 + (12 - q * 4) + k]; + + i16_lut_laurent4[2 * ((q * 16 + i) * 4 + k) + 0] = lut_laurent4[(q * 16 + i) * 4 + k].r * BURST_SCALING; + i16_lut_laurent4[2 * ((q * 16 + i) * 4 + k) + 1] = lut_laurent4[(q * 16 + i) * 4 + k].i * BURST_SCALING; + } + } + } + + burst_lut_prepared = true; + return 0; +} +#endif /* USE_BURST_LUT */ + static signalVector *rotateEdgeBurst(const signalVector &symbols, int sps) { signalVector *burst; @@ -973,7 +1136,11 @@ if (emptyPulse) return rotateBurst(wBurst, guardPeriodLength, sps); else if (sps == 4) +#ifdef USE_BURST_LUT + return modulateBurstLaurentLut(wBurst); +#else return modulateBurstLaurent(wBurst); +#endif else return modulateBurstBasic(wBurst, guardPeriodLength, sps); } @@ -2141,6 +2308,10 @@ generateSincTable(); initGMSKRotationTables();
+#ifdef USE_BURST_LUT + prepareBurstLUT(); +#endif + GSMPulse1 = generateGSMPulse(1); GSMPulse4 = generateGSMPulse(4);
diff --git a/configure.ac b/configure.ac index c81b089..910f18b 100644 --- a/configure.ac +++ b/configure.ac @@ -185,6 +185,15 @@ [enable x86 SSE support (default)]) ])
+AC_ARG_WITH(burst-lut, [ + AC_HELP_STRING([--with-burst-lut], + [enable Burst LUT optimization]) +]) + +AS_IF([test "x$with_burst_lut" = "xyes"], [ + AC_DEFINE(USE_BURST_LUT, 1, Define to 1 for using Burst LUT) +]) + AS_IF([test "x$with_neon" = "xyes"], [ AC_DEFINE(HAVE_NEON, 1, Support ARM NEON) ])