CUDA从入门到放弃.md

Alisa的鸽笼

2020-02-14

Install

Environment

system 105.4.15-2-MANJARO
kernel 5.4.0
cuda 10.2
bumblebee 3.2.1-22
GPU: NVIDIA GP107M [GeForce GTX 1050 Ti Mobile]
Driver: nvidia 440.44

install

推荐直接从pacman中安装，

1
2
3

sudo pacman -S nvidia bumblebee cuda
reboot
nvidia-smi //test

不推荐从官网下载.run文件安装（因为我装炸了)

Basic

Step

malloc memory
calculate
free memory

Structure

Grid
Block
Thread

example

#include <stdio.h>

#define COUNT 2

__global__ void add(int *a, int *b, int *c){
	int idx = threadIdx.x;
	c[idx] = a[idx] + b[idx];
}

int main(){
    int a[COUNT]={1,2};
    int b[COUNT]={3,4};
    int c[COUNT]={0};

    int* d_a = NULL;
    int* d_b = NULL;
    int* d_c = NULL;


    // Step 1 malloc 
    cudaMalloc(&d_a, COUNT * sizeof(int));
    cudaMalloc(&d_b, COUNT * sizeof(int));
    cudaMalloc(&d_c, COUNT * sizeof(int));

    // Step 2 calculate
    cudaMemcpy(d_a, a, COUNT * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, b, COUNT * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemset(d_c, 0, COUNT * sizeof(int));

    add<<<1,COUNT>>>(d_a, d_b, d_c);

    cudaDeviceSynchronize(); // sync
    cudaMemcpy(c, d_c, COUNT * sizeof(int), cudaMemcpyDeviceToHost);

    // Step 3 free
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    for(int i = 0; i < COUNT;i ++){
        printf("%d ",c[i]);
    }
    printf("\n");

}

Execute

1 2	nvcc example.cu -o a.out ./a.out

CUDA从入门到放弃.md

Install

Environment

install

Tag

Basic

Step

Structure

example

Execute

Tag