0
저는 GPU를 처음 접했고 GPU를 사용하여 큰 행렬 벡터 곱셈을 풀고 싶습니다. 행렬이 줄무늬 행렬이므로 "cublasDgbmv"를 사용하여 문제를 해결하려고합니다. 간단한 예제로이 명령을 구현하려고했습니다. 내가 가진gpu를 사용하는 cublasDgbmv의 계수 행렬
/* system of equations sol=A*b:
A=[1 2 3 0 0 0
2 -1 4 1 0 0
3 4 5 -1 7 0
0 1 -1 3 8 9
0 0 7 8 2 6
0 0 0 9 6 10]
b=[0 1 2 3 4 5]
solution supposed to be (using matlab) [8 10 39 85 76 101]*/
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cublas_v2.h>
#include <stdlib.h>
#include <stdio.h>
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
int main()
{
cublasHandle_t handle;
int i, j, k=2, m, n=6; // i,j are used as counters, k is the bandwidth, n is the size of the matrix and m is a constant that will be used in the code
double A[36] = {1,2,3,0,0,0, 2,-1,4,1,0,0, 3,4,5,-1,7,0, 0,1,-1,3,8,9, 0,0,7,8,2,6, 0,0,0,9,6,10};
double* Ab;
Ab = (double*)malloc(n*n*sizeof(double));
double* b;
b = (double*)malloc(n*sizeof(double));
double* sol;
sol = (double*)malloc(n*sizeof(double));
const double alph1 = 1;
const double *alpha_1 = &alph1;
const double alph0 = 0;
const double *alpha_0 = &alph0;
double* d_Ab;
cudaMalloc(&d_Ab, n*n*sizeof(double));
double* d_b;
cudaMalloc(&d_b, n*sizeof(double));
double* d_sol;
cudaMalloc(&d_sol, n*sizeof(double));
for(i=0;i<6;i++)
b[i] = i;
for(j=1;j<=n;j++)
{
m=k+1-j;
for(i=MAX(1,j-k);i<=MIN(n,j+k);i++)
Ab[(m+i-1)*n+(j-1)] = A[(i-1)*n + (j-1)];
}
cudaMemcpy(d_Ab, Ab, n*n*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_b , b, n*sizeof(double), cudaMemcpyHostToDevice);
cublasCreate(&handle);
cublasDgbmv(handle, CUBLAS_OP_N , n, n, k, k, alpha_1, d_Ab, n, d_b, 1, alpha_0, d_sol, 1);
cublasDestroy(handle);
cudaMemcpy(sol, d_sol, n*sizeof(double), cudaMemcpyDeviceToHost);
for(i=0;i<n;i++)
printf("%f ", sol[i]);
free(Ab);
free(b);
free(sol);
cudaFree(d_Ab);
cudaFree(d_b);
cudaFree(d_sol);
}
의 해결책이 : 여기에 내가 쓴 코드는
4.000000 8.000000 33.000000 -31387192811020962000000000000000000000000000000000000000000000000000.000000 -31387192811020962000000000000000000000000000000000000000000000000000.000000 -31387192811020962000000000000000000000000000000000000000000000000000.000000
내가 매트릭스 대역폭 양식 및 열 주요 형태로되어 있음을 알고있다. 나는 이걸로 보여 줬어. link