CVS diff DeviceCUDA.C

Difference for src/DeviceCUDA.C from version 1.3 to 1.4

version 1.3

version 1.4

Line 63

}

// -------------------------------------------------------------------------------------------------

// Node-wide list of device IDs for every rank

#define MAX_NUM_RANKS 2048

int deviceIDList[MAX_NUM_RANKS];

// Node-wide of master PEs for every device ID

#define MAX_NUM_DEVICES 256

int masterPeList[MAX_NUM_DEVICES];

// -------------------------------------------------------------------------------------------------

Line 70

Line 77

// Class creator

DeviceCUDA::DeviceCUDA() {}

DeviceCUDA::DeviceCUDA() : deviceProps(NULL), devices(NULL) {}

// Initalize device

Line 128

Line 135

}

// CkPrintf("Pe %d ranks %d in physical node\n",CkMyPe(),myRankInPhysicalNode);

int deviceCount = 0;

deviceCount = 0;

cudaCheck(cudaGetDeviceCount(&deviceCount));

if ( deviceCount <= 0 ) {

cudaDie("No CUDA devices found.");

}

int *devices;

// Store all device props

int ndevices = 0;

deviceProps = new cudaDeviceProp[deviceCount];

for ( int i=0; i<deviceCount; ++i ) {

cudaCheck(cudaGetDeviceProperties(&deviceProps[i], i));

}

ndevices = 0;

int nexclusive = 0;

if ( usedevicelist ) {

devices = new int[strlen(devicelist)];

Line 234

Line 246

numPesSharingDevice = 1;

}

deviceID = dev;

// Store device IDs to node-wide list

if (CkMyRank() >= MAX_NUM_RANKS)

NAMD_die("Maximum number of ranks (2048) per node exceeded");

deviceIDList[CkMyRank()] = deviceID;

if ( masterPe != CkMyPe() ) {

if ( CmiPhysicalNodeID(masterPe) < 2 )

CkPrintf("Pe %d physical rank %d will use CUDA device of pe %d\n",

Line 243

Line 262

return;

}

// Store master PEs for every device ID to node-wide list

if (CkMyRank() >= MAX_NUM_DEVICES)

NAMD_die("Maximum number of CUDA devices (256) per node exceeded");

masterPeList[deviceID] = masterPe;

// Set masterPe values to -1 for devices that do not exist.

// Only master Pe with deviceID == devices[0] does the writing

if (deviceID == devices[0]) {

// For device IDs 0...deviceCount-1, check if it is in the devices[0...deviceCount-1]

for (int i=0;i < deviceCount;i++) {

bool deviceOK = false;

for (int j=0;j < deviceCount;j++) {

if (devices[j] == i) deviceOK = true;

}

if (!deviceOK) masterPeList[i] = -1;

}

// Device IDs deviceCount ... MAX_NUM_DEVICES are not possible, just set them to -1

for (int i=deviceCount;i < MAX_NUM_DEVICES;i++) {

masterPeList[i] = -1;

}

// disable token-passing but don't submit local until remote finished

// if shared_gpu is true, otherwise submit all work immediately

firstPeSharingGpu = CkMyPe();

Line 269

Line 309

} // just let CUDA pick a device for us

{

cudaCheck(cudaSetDeviceFlags(cudaDeviceMapHost));

int dev;

cudaCheck(cudaGetDevice(&dev));

deviceID = dev;

cudaDeviceProp deviceProp;

cudaCheck(cudaGetDeviceProperties(&deviceProp, dev));

if ( deviceProp.computeMode == cudaComputeModeProhibited )

Line 281

Line 323

cudaDie("device not of compute capability 1.1 or higher");

if ( ! deviceProp.canMapHostMemory )

cudaDie("device cannot map host memory");

#ifndef DISABLE_CUDA_TEXTURE_OBJECTS

if (deviceProp.major < 3)

cudaDie("CUDA texture objects require compute capability 3.0 or higher.\nUse DISABLE_CUDA_TEXTURE_OBJECTS to disable texture objects.");

#endif

extern int read_CUDA_ARCH();

cuda_arch = read_CUDA_ARCH();

}

// Class destructor

DeviceCUDA::~DeviceCUDA() {

if (deviceProps != NULL) delete [] deviceProps;

if (devices != NULL) delete [] devices;

delete [] pesSharingDevice;

}

// Return device ID for pe. Assumes all nodes are the same

int DeviceCUDA::getDeviceIDforPe(int pe) {

return deviceIDList[CkRankOf(pe) % CkMyNodeSize()];

}

// Returns master PE for the device ID, or -1 if device not found

int DeviceCUDA::getMasterPeForDeviceID(int deviceID) {

return masterPeList[deviceID % deviceCount];

}

// Returns true if process "pe" shares this device

bool DeviceCUDA::device_shared_with_pe(int pe) {

Line 314

Line 379

return ( numPesOnNodeSharingDevice == CkMyNodeSize() );

}

int DeviceCUDA::getMaxNumThreads() {

int dev;

cudaCheck(cudaGetDevice(&dev));

return deviceProps[dev].maxThreadsPerBlock;

}

int DeviceCUDA::getMaxNumBlocks() {

int dev;

cudaCheck(cudaGetDevice(&dev));

return deviceProps[dev].maxGridSize[0];

}

BASE

2 types (remote & local)

Legend:

Removed in v.1.3
changed lines
	Added in v.1.4

Made by using version 1.53 of cvs2html