My Cuda kernel code is not working? -
i try make small code generate numbers , return result in array after run code it's not working, tried use nsight debugger understand problem freeze , close immediately. don't know problem ?
could me please understand problem in code ?
thanks
__global__ void mykernel( int* pf_tmp, int* pl_tmp, int* qf_tmp, int* ql_tmp, int m[2], int p[5], int q[5], int i, int* n, int out[10][5], int n) { int id = blockdim.x * blockidx.x + threadidx.x; int idx = blockidx.x; int idy = blockidx.y; int w = idx/100; int x = idx%100; int y = idy; int z = threadidx.x; int len = ((i * 2) + 5); // fill pf_tmp & qf_tmp if( > 0){ for(int k = 0; k < (i * 2); k++) { p[k] = pf_tmp[k]; q[k] = qf_tmp[k]; } } // fill x if( x > 10) { p[(i*2)] = (x - (x % 10)) / 10; p[(i*2)+1] = x % 10; }else{ p[(i*2)] = 0; p[(i*2)+1] = x; } // fill y if( y > 10) { q[(i*2)] = (y - (y % 10)) / 10; q[(i*2)+1] = y % 10; }else{ q[(i*2)] = 0; q[(i*2)+1] = y; } // fill m p[(i * 2)+2] = m[0]; q[(i * 2)+2] = m[1]; // fill w if( w > 10) { p[(i*2)+3] = (w - (w % 10)) / 10; p[(i*2)+4] = w % 10; }else{ p[(i*2)+3] = 0; p[(i*2)+4] = w; } // fill z if( z > 10) { q[(i*2)+3] = (z - (z % 10)) / 10; q[(i*2)+4] = z % 10; }else{ q[(i*2)+3] = 0; q[(i*2)+4] = z; } // fill pl_tmp & ql_tmp if( > 0) { for(int k = 0; k < (i * 2); k++) { p[(len-(i * 2))+k] = pl_tmp[k]; q[(len-(i * 2))+k] = ql_tmp[k]; } } if(id<10) { for(int k =0; k<5; k++) out[id][k] = p[k]; } } int main() { cudaerror err; dim3 blocks(10000, 100); dim3 threads(100); int m[2] = {4,5}; int hst_out[10][5]; int p[5]; int q[5]; err = cudamalloc((void **)&p, 5); err = cudamalloc((void **)&q, 5); err = cudamalloc((void **)&hst_out, 50); mykernel<<<blocks, threads>>>(null, null, null, null, m, p, q, 0, null, hst_out, 100000000); return 0; }
the error obvious, c programming.
when declare
int m[2] = {4,5}; int hst_out[10][5]; int p[5]; int q[5]; now hst_out, p, q not pointer, later used pointer:
err = cudamalloc((void **)&p, 5); err = cudamalloc((void **)&q, 5); err = cudamalloc((void **)&hst_out, 50);
so u should have declare pointer instead, eg,
int *p;
and used way:
err = cudamalloc((void **)&p, 5*sizeof(int));
and notice size have declared 5 bytes....whereas declared 5*sizeof(int).
for more example see:
http://cuda-programming.blogspot.sg/2013/03/how-to-avoid-uses-of-cudamalloc-in.html
Comments
Post a Comment