2011-10-02 6 views
0

OpenCL에 문제가 있습니다. 전체 명령 대기열을 실행하지만 입력의 1/4 만 읽습니다. 결과의 1/4 만 씁니다. 얼마나 많은 반복이 있더라도 항상 1/4입니다.OpenCL은 버퍼 메모리의 1/4에서 읽기/쓰기 만하고 때때로 충돌합니다.

또한 때때로 무작위 내가

소스 코드 (등, ????에서 0x4c4783f6)가 충돌하는 경우, 디버그 기호가 없기 때문에, 어떤 정보를 얻을 해달라고 디버깅을 crashes..with :

#include <iostream> 
#include <cl/cl.h> 
#include <cassert> 
#include <cstring> 

const char *progsrc[] = { 
"#pragma OPENCL EXTENSION cl_intel_printf : enable\n\ 
__kernel void add(__global const int *a, __global const int *b, __global int *out) \ 
{ \ 
    int tid = get_global_id(0);\ 
    out[tid] = tid/*a[tid]+b[tid]*/;\ 
    printf(\"krnl: %d = %d + %d \\n\", out[tid], a[tid], b[tid]);\ 
}"}; 

const int iterations = 20; 

#define CLCheck(a) \ 
do\ 
{\ 
    if(a != CL_SUCCESS)\ 
    {\ 
     std::cerr << "OpenCL Error(" << a << ") at " << __LINE__ << std::endl;\ 
     return -1;\ 
    }\ 
} while(0) 

int main() 
{ 
    cl_int err = CL_SUCCESS; 

    int *aH = NULL; 
    int *bH = NULL; 
    int *outH = NULL; 

    cl_uint platnum, devnum; 
    cl_device_id dev; 
    cl_platform_id plat; 
    err = clGetPlatformIDs(0, 0, &platnum); 
    CLCheck(err); 
    cl_platform_id pfids[platnum]; 
    err = clGetPlatformIDs(platnum, pfids, &platnum); 
    CLCheck(err); 

    if(!platnum) 
    { 
     std::cerr << "No platform found." << std::endl; 
     return -1; 
    } 
    else 
     std::cout << platnum << " OpenCL platform(s) found.\n" << std::endl; 

    for(unsigned int i = 0; i != platnum; i++) 
    { 
     char buf[4096]; 

     err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, 0, 0, &devnum); 
     CLCheck(err); 
     cl_device_id devids[devnum]; 
     err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, devnum, devids, &devnum); 
     CLCheck(err); 
     if(!devnum) 
     { 
      std::cerr << "No device found." << std::endl; 
      return -1; 
     } 
     else 
      std::cout << " " << devnum << " OpenCL device(s) found.\n" << std::endl; 

     for(unsigned int i2 = 0; i2 != devnum; i2++) 
     { 
      char buf[1024]; 
      std::cout << ": \n\tName: " << buf; 
      err = clGetDeviceInfo(devids[i2], CL_DEVICE_VENDOR, 1024, buf, NULL); 
      CLCheck(err); 
      if(!strncmp(buf, "Intel", 5)) 
      { 
       dev = devids[0]; 
       plat = pfids[i]; 
       std::cout << "\n\tFound Intel(R) OpenCL device."; 
      } 
     } 
    } 
    cl_context_properties ctxprop[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)plat, 0}; 
    cl_context ctx = clCreateContext(ctxprop, 1, &dev, NULL, NULL, &err); 
    CLCheck(err); 

    cl_program program = clCreateProgramWithSource(ctx, 1, progsrc, NULL, &err); 
    CLCheck(err); 
    err = clBuildProgram(program, 1, &dev, "", NULL, NULL); 
    if(err != CL_SUCCESS) 
    { 
     size_t bufsz; 
     err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, 0, &bufsz); 
     char buf[bufsz]; 
     err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, bufsz, buf, &bufsz); 
     std::cerr << "OpenCL program building failed: " << buf << std::endl; 
     return -1; 
    } 
    err = clUnloadCompiler(); 
    CLCheck(err); 

    aH = new int[iterations]; 
    bH = new int[iterations]; 
    outH = new int[iterations]; 
    memset(outH, 0, iterations*sizeof(int)); 
    for(int i = 0; i != iterations; i++) 
    { 
     aH[i] = i; 
     bH[i] = i*2; 
    } 

    cl_mem aCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err); 
    cl_mem bCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err); 
    CLCheck(err); 
    cl_mem outCL = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, iterations, NULL, &err); 
    CLCheck(err); 

    cl_kernel krnl = clCreateKernel(program, "add", &err); 
    CLCheck(err); 

    err = clSetKernelArg(krnl, 0, sizeof(aCL), &aCL); 
    CLCheck(err); 
    err = clSetKernelArg(krnl, 1, sizeof(bCL), &bCL); 
    CLCheck(err); 
    err = clSetKernelArg(krnl, 2, sizeof(outCL), &outCL); 
    CLCheck(err); 

    cl_command_queue cmdqueue = clCreateCommandQueue(ctx, dev, 0, &err); 
    cl_event evt; 
    size_t global_work_size[1] = { iterations }; 
    err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations, aH, 0, NULL, NULL); 
    err = clEnqueueWriteBuffer(cmdqueue, bCL, CL_TRUE, 0, iterations, bH, 0, NULL, NULL); 
    err = clEnqueueNDRangeKernel(cmdqueue, krnl, 1, NULL, global_work_size, NULL, 0, NULL, &evt); 
    err = clWaitForEvents(1, &evt); 
    err = clEnqueueReadBuffer(cmdqueue, outCL, CL_TRUE, 0, iterations, outH, 0, NULL, &evt); 

    for(int i = 0; i != iterations; i++) 
    { 
     std::cout << outH[i] << std::endl; 
    } 

    err = clReleaseEvent(evt); 
    err = clReleaseCommandQueue(cmdqueue); 
    err = clReleaseKernel(krnl); 
    err = clReleaseMemObject(outCL); 
    err = clReleaseMemObject(bCL); 
    err = clReleaseMemObject(aCL); 
    err = clReleaseProgram(program); 
    err = clReleaseContext(ctx); 

    if(aH) 
     delete aH; 
    if(bH) 
     delete bH; 
    if(outH) 
     delete outH; 
    return 0; 
} 

출력 :

2 OpenCL platform(s) found. 

Platform 0 : 
     Name: NVIDIA CUDA 
     Vendor: NVIDIA Corporation 
     Profile: FULL_PROFILE 
     Version: OpenCL 1.1 CUDA 4.0.1 
     Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing c 
l_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing c 
l_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll 

1 OpenCL device(s) found. 

    Device 0: 
     Name: GeForce GT 425M 
     Vendor: NVIDIA Corporation 
     Profile: FULL_PROFILE 
     Driver version: 280.26 
     OpenCL version: OpenCL C 1.1 
     Version: OpenCL 1.1 CUDA 
     Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing c 
l_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing c 
l_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_khr_g 
lobal_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32 
_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 

Platform 1 : 
     Name: Intel(R) OpenCL 
     Vendor: Intel(R) Corporation 
     Profile: FULL_PROFILE 
     Version: OpenCL 1.1 
     Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_i 
nt32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extende 
d_atomics cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl 
_intel_immediate_execution cl_khr_gl_sharing cl_khr_icd 

1 OpenCL device(s) found. 

    Device 0: 
     Name: Intel(R) Core(TM) i3 CPU  M 370 @ 2.40GHz 
     Found Intel(R) OpenCL device. 
     Vendor: Intel(R) Corporation 
     Profile: FULL_PROFILE 
     Driver version: 1.1 
     OpenCL version: OpenCL C 1.1 
     Version: OpenCL 1.1 (Build 15293.6650) 
     Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_i 
nt32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extende 
d_atomics cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl 
_intel_immediate_execution cl_khr_gl_sharing 

krnl: 0 = 0 + 0 
krnl: 1 = 1 + 2 
krnl: 2 = 2 + 4 
krnl: 3 = 3 + 6 
krnl: 4 = 4 + 8 
krnl: 5 = 0 + 0 
krnl: 6 = 0 + 0 
krnl: 7 = 0 + 0 
krnl: 16 = 0 + 492859489 
krnl: 17 = 0 + -1042621749 
krnl: 18 = 0 + 1310105771 
krnl: 19 = 0 + 134230852 
krnl: 8 = 0 + 0 
krnl: 9 = 0 + 0 
krnl: 10 = 0 + -1094462526 
krnl: 11 = 0 + -1094462526 
krnl: 12 = 0 + -1230120245 
krnl: 13 = 0 + 500723958 
krnl: 14 = 0 + 530164160 
krnl: 15 = 0 + 492859489 
0 
1 
2 
3 
4 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 

덕분에

답변

2

:) 나는 FA 아니에요 OpenCL을 함께 miliar,하지만 난 당신이 여기에 몇 sizeof 년대 누락 것 같아요 :

err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations, aH, 0, NULL, NULL); 

아마해야합니다 :

err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations * sizeof(int), aH, 0, NULL, NULL); 

을 그리고 같은이 다음과 유사한 코드를 적용한다. 편집

: 여기

그리고 당신이 몇 sizeof()의 놓칠 수있는 다른 장소 : ... 대단히 감사합니다,

cl_mem aCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err); 
cl_mem bCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err); 
CLCheck(err); 
cl_mem outCL = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, iterations, NULL, &err); 
CLCheck(err); 
+0

아를 어떻게 내가 경험 C++ 프로그래머 (몇 년) 이것을 보지 마라 ._. :) – Pillum

+0

잘 C + +에서 당신은 주위에 저글링 * void 포인터, 그게 왜;) –