我是一名CUDA初学者,尝试从GPU写入设备内存,以便我可以将其复制回主机并查看结果。为什么在写入设备内存时此CUDA程序崩溃?
这是我的代码,我剥了下来小我同时还获得了错误:
#include <iostream>
#include <vector>
bool verbose = true;
int SIZE = 10;
__global__
void assign_state(uint8_t * states)
{
states[threadIdx.x] = 42;
}
int main (int argc, char *argv[])
{
// Create host states vector
if (verbose) std::cout << "Creating host memory... ";
std::vector<uint8_t> * states = new std::vector<uint8_t>(SIZE);
int statesSize = states->size() * sizeof(uint8_t);
if (verbose) std::cout << "Done\n";
// Send data to device
if (verbose) std::cout << "Sending graph to device... ";
uint8_t * deviceStatesPointer;
cudaMalloc((void**)&deviceStatesPointer, statesSize);
cudaMemcpy(deviceStatesPointer, states, statesSize, cudaMemcpyHostToDevice);
if (verbose) std::cout << "Done\n";
// Assign states
if (verbose) std::cout << "Assign states... ";
dim3 dimBlock(SIZE, 1);
dim3 dimGrid(1, 1);
assign_state<<<dimGrid, dimBlock>>>(deviceStatesPointer);
if (verbose) std::cout << "Done\n";
// Get data back from device
if (verbose) std::cout << "Getting data back from device...\n";
cudaMemcpy(states, deviceStatesPointer, statesSize, cudaMemcpyDeviceToHost);
if (verbose)
{
for (int i = 0; i < states->size(); i++)
{
std::cout << "\t" << i << ": " << states->at(i) << std::endl;
}
}
if (verbose) std::cout << "Done\n";
return 0;
}
它的工作原理,只要我注释掉states[threadIdx.x] = 42'
线,但如果我不这样做,该程序在打印出for循环的第一个迭代后立即崩溃。
从我在网上看到的,我猜我可能会试图写入内存我不应该写信给?
我一直在根据我的代码this Hello World example。
这个:'new std :: vector'是无稽之谈 – Drop