1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
| #include <stdio.h>
#define COUNT 2
__global__ void add(int *a, int *b, int *c){ int idx = threadIdx.x; c[idx] = a[idx] + b[idx]; }
int main(){ int a[COUNT]={1,2}; int b[COUNT]={3,4}; int c[COUNT]={0};
int* d_a = NULL; int* d_b = NULL; int* d_c = NULL;
cudaMalloc(&d_a, COUNT * sizeof(int)); cudaMalloc(&d_b, COUNT * sizeof(int)); cudaMalloc(&d_c, COUNT * sizeof(int));
cudaMemcpy(d_a, a, COUNT * sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(d_b, b, COUNT * sizeof(int), cudaMemcpyHostToDevice); cudaMemset(d_c, 0, COUNT * sizeof(int));
add<<<1,COUNT>>>(d_a, d_b, d_c);
cudaDeviceSynchronize(); cudaMemcpy(c, d_c, COUNT * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(d_a); cudaFree(d_b); cudaFree(d_c);
for(int i = 0; i < COUNT;i ++){ printf("%d ",c[i]); } printf("\n");
}
|