나는이 코드를 실행하여 추력 stable_sort 및 사용자 정의 연산자를 사용하여 IP의 큰 배열을 정렬하여 IP를 비교합니다. 이 코드는 50000 개 미만의 IP 배열에서 작동하지만 큰 배열에 대해서는 메모리 오류가 발생합니다. 메모리 위치 오류 : 큰 배열 및 사용자 정의 비교 연산자를 사용할 때 thrust :: stable_sort
이 얼마나 큰 배열이 문제를 해결하기 위해 메모리 위치에서 추력 :: 시스템 :: SYSTEM_ERROR : 내가 가진#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <stdio.h>
#include <time.h>
#include <device_functions.h>
template<typename T>
struct vector_less
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
__host__ __device__ bool operator()(const T &lhs, const T &rhs) const {
if (lhs[0] == rhs[0])
if (lhs[1] == rhs[1])
if (lhs[2] == rhs[2])
return lhs[3] < rhs[3];
else
return lhs[2] < rhs[2];
else
return lhs[1] < rhs[1];
else
return lhs[0] < rhs[0];
}
};
__global__ void prepare_ips_list(unsigned char ** dev_sorted_Ips, unsigned char * ip_b1, unsigned char * ip_b2, unsigned char * ip_b3, unsigned char * ip_b4, unsigned int searchedIpsSize)
{
int thread = threadIdx.x + blockIdx.x * blockDim.x;
if (thread < searchedIpsSize)
{
dev_sorted_Ips[thread] = new unsigned char[4];
dev_sorted_Ips[thread][0] = ip_b1[thread];
dev_sorted_Ips[thread][1] = ip_b2[thread];
dev_sorted_Ips[thread][2] = ip_b3[thread];
dev_sorted_Ips[thread][3] = ip_b4[thread];
}
}
int main()
{
const int size = 1000000;
unsigned char * ip_b1 = new unsigned char[size];
unsigned char * ip_b2 = new unsigned char[size];;
unsigned char * ip_b3 = new unsigned char[size];;
unsigned char * ip_b4 = new unsigned char[size];;
unsigned char * dev_ip_b1;
unsigned char * dev_ip_b2;
unsigned char * dev_ip_b3;
unsigned char * dev_ip_b4;
unsigned char ** dev_sortedIps;
for (int i = 0; i < size; i++)
{
ip_b1[i] = rand() % 240;
ip_b2[i] = rand() % 240;
ip_b3[i] = rand() % 240;
ip_b4[i] = rand() % 240;
}
cudaError_t cudaStatus;
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_ip_b1, size * sizeof(unsigned char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_ip_b1, ip_b1, size * sizeof(unsigned char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_ip_b2, size * sizeof(unsigned char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_ip_b2, ip_b2, size * sizeof(unsigned char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_ip_b3, size * sizeof(unsigned char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_ip_b3, ip_b3, size * sizeof(unsigned char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_ip_b4, size * sizeof(unsigned char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_ip_b4, ip_b4, size * sizeof(unsigned char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_sortedIps, size * sizeof(unsigned char *));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
int resetThreads = size;
int resetBlocks = 1;
if (size > 1024)
{
resetThreads = 1024;
resetBlocks = size/1024;
if (size % 1024 > 0)
resetBlocks++;
}
prepare_ips_list << <resetBlocks, resetThreads >> >(dev_sortedIps, dev_ip_b1, dev_ip_b2, dev_ip_b3, dev_ip_b4, size);
thrust::device_ptr<unsigned char *> sorted_list_ptr1(dev_sortedIps);
thrust::stable_sort(sorted_list_ptr1, sorted_list_ptr1 + size, vector_less<unsigned char *>());
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching !\n", cudaStatus);
goto Error;
}
return 0;
Error:
cudaFree(dev_ip_b1);
cudaFree(dev_ip_b2);
cudaFree(dev_ip_b3);
cudaFree(dev_ip_b4);
cudaFree(dev_sortedIps);
}
오류는 다음과 같습니다 : 마이크로 소프트 C++ 예외 여기 내가 사용하는 코드는? 병합 후 부품을 나누고 정렬하는 것과 같은 정렬을 위해 다른 기술을 사용해야합니까?
[tag : c]가 아닙니다.여기 –