This is merely a historical archive of years 2008-2021, before the migration to mailman3.
A maintained and still updated list archive can be found at https://lists.osmocom.org/hyperkitty/list/osmocom-sdr@lists.osmocom.org/.
Sylvain Munaut tnt at 246tnt.comHi, > I just compiled gr-fosphor on a PC equipped with a HD 5450 card, and when I try to run osmocom_fft -F I gut the following error: > [+] Selected device: Cedar > Build log for 'display.cl': > "/tmp/OCL2287T8.cl", line 67: error: work group size exceeds the maximum > default value for the selected device > __attribute__((reqd_work_group_size(16, 16, 1))) > ^ > > 1 error detected in the compilation of "/tmp/OCL2287T8.cl". > Frontend phase failed compilation. Yes, that card only suppot 128 as a maximum work group size and fosphor is written to use 256. I think this particular generation of card is the only one that's new enough to support OpenCL but old enough to not support WG of 256. I did however write a patch to support it a while back and just never bothered to clean it up (to select size dynamically depending on your card). Try the attached patch and see if it works for you. Cheers, Sylvain -------------- next part -------------- commit 5b2d6c6abef2680e367fc315fb32f9b51fe5de8f Author: Sylvain Munaut <tnt at 246tNt.com> Date: Sat Oct 19 23:23:57 2013 +0200 [hack] Make display kernel 8x8 diff --git a/fosphor/cl.c b/fosphor/cl.c index 3160dc4..530d3a2 100644 --- a/fosphor/cl.c +++ b/fosphor/cl.c @@ -679,9 +679,9 @@ fosphor_cl_process(struct fosphor *self, /* Execute display kernel */ global[0] = FOSPHOR_FFT_LEN; - global[1] = 16; - local[0] = 16; - local[1] = 16; + global[1] = 8; + local[0] = 8; + local[1] = 8; err = clEnqueueNDRangeKernel(cl->cq, cl->kern_display, 2, NULL, global, local, 0, NULL, NULL); CL_ERR_CHECK(err, "Unable to queue display kernel execution"); diff --git a/fosphor/display.cl b/fosphor/display.cl index dd19bba..82d210f 100644 --- a/fosphor/display.cl +++ b/fosphor/display.cl @@ -42,6 +42,9 @@ //#define MAX_HOLD_NORMAL #define MAX_HOLD_DECAY +#define QS_LOG 3 +#define QS (1<<QS_LOG) + #ifdef USE_NV_SM11_ATOMICS @@ -64,7 +67,7 @@ inline void nv_sm11_atomic_inc(volatile __local uint *p, uint tag) #endif /* USE_NV_SM11_ATOMICS */ -__attribute__((reqd_work_group_size(16, 16, 1))) +__attribute__((reqd_work_group_size(QS, QS, 1))) __kernel void display( /* FFT Input */ __global const float2 *fft, /* [ 0] Input FFT (complex) */ @@ -91,20 +94,20 @@ __kernel void display( float max_pwr = - 1000.0f; /* Local memory */ - __local float live_buf[16 * 16]; /* get_local_size(0) * get_local_size(1) */ - __local float max_buf[16 * 16]; /* get_local_size(0) * get_local_size(1) */ - __local uint histo_buf[16 * 128]; + __local float live_buf[QS * QS]; /* get_local_size(0) * get_local_size(1) */ + __local float max_buf[QS * QS]; /* get_local_size(0) * get_local_size(1) */ + __local uint histo_buf[QS * 128]; /* Local shortcuts */ const float live_one_minus_alpha = 1.0f - live_alpha; /* Transposition & Atomic emulation */ #ifdef USE_NV_SM11_ATOMICS - __local float pwr_buf[16 * 16]; /* pwr transpose buffer */ + __local float pwr_buf[QS * QS]; /* pwr transpose buffer */ - uint tib = (get_local_id(0) + get_local_id(1)) & 15; - uint ti0 = tib | (get_local_id(0) << 4); - uint ti1 = tib | (get_local_id(1) << 4); + uint tib = (get_local_id(0) + get_local_id(1)) & (QS-1); + uint ti0 = tib | (get_local_id(0) << QS_LOG); + uint ti1 = tib | (get_local_id(1) << QS_LOG); const uint tag = get_local_id(0) << (UINT_BITS - LOG2_WARP_SIZE); #endif @@ -114,6 +117,24 @@ __kernel void display( __local uint *h = &histo_buf[get_local_id(1) * get_local_size(0) + get_local_id(0)]; +#if QS == 8 + h[ 0] = 0; + h[ 64] = 0; + h[ 128] = 0; + h[ 192] = 0; + h[ 256] = 0; + h[ 320] = 0; + h[ 384] = 0; + h[ 448] = 0; + h[ 512] = 0; + h[ 576] = 0; + h[ 640] = 0; + h[ 704] = 0; + h[ 768] = 0; + h[ 832] = 0; + h[ 896] = 0; + h[ 960] = 0; +#elif QS == 16 h[ 0] = 0; h[ 256] = 0; h[ 512] = 0; @@ -122,6 +143,7 @@ __kernel void display( h[1280] = 0; h[1536] = 0; h[1792] = 0; +#endif /* Wait for all clears to be done by everyone */ barrier(CLK_LOCAL_MEM_FENCE); @@ -169,11 +191,11 @@ __kernel void display( /* Atomic Bin increment */ #if defined(USE_NV_SM11_ATOMICS) - nv_sm11_atomic_inc(&histo_buf[(bin << 4) + get_local_id(1)], tag); + nv_sm11_atomic_inc(&histo_buf[(bin << QS_LOG) + get_local_id(1)], tag); #elif defined(USE_EXT_ATOMICS) - atom_inc(&histo_buf[(bin << 4) + get_local_id(0)]); + atom_inc(&histo_buf[(bin << QS_LOG) + get_local_id(0)]); #else - atomic_inc(&histo_buf[(bin << 4) + get_local_id(0)]); + atomic_inc(&histo_buf[(bin << QS_LOG) + get_local_id(0)]); #endif }