00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "CUDAKernels.h"
00044 #define CUDAMARCHINGCUBES_INTERNAL 1
00045 #include "CUDAMarchingCubes.h"
00046 #include <thrust/scan.h>
00047 #include <thrust/functional.h>
00048
00049
00050
00051 #define NTHREADS 48
00052
00053
00054
00055
00056
00057
00058 inline __host__ __device__ float3 operator+(float3 a, float3 b) {
00059 return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
00060 }
00061 inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) {
00062 return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
00063 }
00064 inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) {
00065 return make_uint2(a.x + b.x, a.y + b.y);
00066 }
00067
00068
00069 inline __host__ __device__ float3 operator-(float3 a, float3 b) {
00070 return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
00071 }
00072 inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) {
00073 return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
00074 }
00075
00076
00077 inline __host__ __device__ float3 operator*(float b, float3 a) {
00078 return make_float3(b * a.x, b * a.y, b * a.z);
00079 }
00080
00081
00082 inline __host__ __device__ float dot(float3 a, float3 b) {
00083 return a.x * b.x + a.y * b.y + a.z * b.z;
00084 }
00085
00086
00087 inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) {
00088 return a + t*(b-a);
00089 }
00090
00091
00092 inline __host__ __device__ float length(float3 v) {
00093 return sqrtf(dot(v, v));
00094 }
00095
00096
00097
00098
00099
00100
00101 texture<int, 1, cudaReadModeElementType> triTex;
00102 texture<unsigned int, 1, cudaReadModeElementType> numVertsTex;
00103
00104 texture<float, 3, cudaReadModeElementType> volumeTex;
00105
00106
00107 __device__ float sampleVolume(float *data, uint3 p, uint3 gridSize) {
00108
00109
00110
00111
00112 unsigned int i = (p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x;
00113 return data[i];
00114 }
00115
00116
00117
00118 __device__ float3 sampleColors(float3 *data, uint3 p, uint3 gridSize) {
00119
00120
00121
00122
00123 unsigned int i = (p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x;
00124 return data[i];
00125 }
00126
00127
00128
00129 __device__ uint3 calcGridPos(unsigned int i, uint3 gridSize) {
00130 uint3 gridPos;
00131 unsigned int gridsizexy = gridSize.x * gridSize.y;
00132 gridPos.z = i / gridsizexy;
00133 unsigned int tmp1 = i - (gridsizexy * gridPos.z);
00134 gridPos.y = tmp1 / gridSize.x;
00135 gridPos.x = tmp1 - (gridSize.x * gridPos.y);
00136 return gridPos;
00137 }
00138
00139
00140
00141 __device__ float3 vertexInterp(float isolevel, float3 p0, float3 p1, float f0, float f1) {
00142 float t = (isolevel - f0) / (f1 - f0);
00143 return lerp(p0, p1, t);
00144 }
00145
00146
00147
00148 template <int gridis3d, int subgrid>
00149 __global__ void classifyVoxel(uint2* voxelVerts, float *volume,
00150 uint3 gridSize, unsigned int numVoxels, float3 voxelSize,
00151 uint3 subGridStart, uint3 subGridEnd,
00152 float isoValue) {
00153 uint3 gridPos;
00154 unsigned int i;
00155
00156
00157
00158 if (gridis3d) {
00159
00160
00161 gridPos.x = blockIdx.x * blockDim.x + threadIdx.x;
00162 gridPos.y = blockIdx.y * blockDim.y + threadIdx.y;
00163 gridPos.z = blockIdx.z * blockDim.z + threadIdx.z;
00164
00165
00166 if (gridPos.x >= gridSize.x ||
00167 gridPos.y >= gridSize.y ||
00168 gridPos.z >= gridSize.z)
00169 return;
00170
00171
00172 i = gridPos.z*gridSize.x*gridSize.y + gridPos.y*gridSize.x + gridPos.x;
00173 } else {
00174
00175 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00176 i = (blockId * blockDim.x) + threadIdx.x;
00177
00178
00179 if (i >= numVoxels)
00180 return;
00181
00182
00183 gridPos = calcGridPos(i, gridSize);
00184 }
00185
00186
00187
00188
00189 uint2 numVerts = make_uint2(0, 0);
00190 if (subgrid) {
00191 if (gridPos.x < subGridStart.x ||
00192 gridPos.y < subGridStart.y ||
00193 gridPos.z < subGridStart.z ||
00194 gridPos.x >= subGridEnd.x ||
00195 gridPos.y >= subGridEnd.y ||
00196 gridPos.z >= subGridEnd.z) {
00197 voxelVerts[i] = numVerts;
00198 return;
00199 }
00200 } else {
00201 if (gridPos.x > (gridSize.x - 2) || gridPos.y > (gridSize.y - 2) || gridPos.z > (gridSize.z - 2)) {
00202 voxelVerts[i] = numVerts;
00203 return;
00204 }
00205 }
00206
00207
00208 float field[8];
00209 field[0] = sampleVolume(volume, gridPos, gridSize);
00210
00211
00212
00213
00214
00215 field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
00216 field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
00217 field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
00218 field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
00219 field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
00220 field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
00221 field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
00222
00223
00224 unsigned int cubeindex;
00225 cubeindex = ((unsigned int) (field[0] < isoValue));
00226 cubeindex += ((unsigned int) (field[1] < isoValue))*2;
00227 cubeindex += ((unsigned int) (field[2] < isoValue))*4;
00228 cubeindex += ((unsigned int) (field[3] < isoValue))*8;
00229 cubeindex += ((unsigned int) (field[4] < isoValue))*16;
00230 cubeindex += ((unsigned int) (field[5] < isoValue))*32;
00231 cubeindex += ((unsigned int) (field[6] < isoValue))*64;
00232 cubeindex += ((unsigned int) (field[7] < isoValue))*128;
00233
00234
00235 numVerts.x = tex1Dfetch(numVertsTex, cubeindex);
00236 numVerts.y = (numVerts.x > 0);
00237
00238 voxelVerts[i] = numVerts;
00239 }
00240
00241
00242
00243 __global__ void compactVoxels(unsigned int *compactedVoxelArray, uint2 *voxelOccupied, unsigned int lastVoxel, unsigned int numVoxels, unsigned int numVoxelsp1) {
00244 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00245 unsigned int i = (blockId * blockDim.x) + threadIdx.x;
00246
00247 if ((i < numVoxels) && ( (i < numVoxelsp1) ? voxelOccupied[i].y < voxelOccupied[i+1].y : lastVoxel) ) {
00248 compactedVoxelArray[ voxelOccupied[i].y ] = i;
00249 }
00250 }
00251
00252
00253
00254 __global__ void generateTriangleVerticesSMEM(float3 *pos, unsigned int *compactedVoxelArray, uint2 *numVertsScanned, float *volume,
00255 uint3 gridSize, float3 voxelSize, float isoValue, unsigned int activeVoxels, unsigned int maxVertsM3) {
00256 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00257 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00258 unsigned int i = zOffset * ( blockDim.x * blockDim.y) + (blockId * blockDim.x) + threadIdx.x;
00259
00260 if (i >= activeVoxels )
00261 return;
00262
00263 unsigned int voxel = compactedVoxelArray[i];
00264
00265
00266 uint3 gridPos = calcGridPos(voxel, gridSize);
00267
00268 float3 p;
00269 p.x = gridPos.x * voxelSize.x;
00270 p.y = gridPos.y * voxelSize.y;
00271 p.z = gridPos.z * voxelSize.z;
00272
00273
00274 float3 v[8];
00275 v[0] = p;
00276 v[1] = p + make_float3(voxelSize.x, 0, 0);
00277 v[2] = p + make_float3(voxelSize.x, voxelSize.y, 0);
00278 v[3] = p + make_float3(0, voxelSize.y, 0);
00279 v[4] = p + make_float3(0, 0, voxelSize.z);
00280 v[5] = p + make_float3(voxelSize.x, 0, voxelSize.z);
00281 v[6] = p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z);
00282 v[7] = p + make_float3(0, voxelSize.y, voxelSize.z);
00283
00284 float field[8];
00285 field[0] = sampleVolume(volume, gridPos, gridSize);
00286 field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
00287 field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
00288 field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
00289 field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
00290 field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
00291 field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
00292 field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
00293
00294
00295 unsigned int cubeindex;
00296 cubeindex = ((unsigned int)(field[0] < isoValue));
00297 cubeindex += ((unsigned int)(field[1] < isoValue))*2;
00298 cubeindex += ((unsigned int)(field[2] < isoValue))*4;
00299 cubeindex += ((unsigned int)(field[3] < isoValue))*8;
00300 cubeindex += ((unsigned int)(field[4] < isoValue))*16;
00301 cubeindex += ((unsigned int)(field[5] < isoValue))*32;
00302 cubeindex += ((unsigned int)(field[6] < isoValue))*64;
00303 cubeindex += ((unsigned int)(field[7] < isoValue))*128;
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314 __shared__ float3 vertlist[12*NTHREADS];
00315
00316 vertlist[threadIdx.x] = vertexInterp(isoValue, v[0], v[1], field[0], field[1]);
00317 vertlist[NTHREADS+threadIdx.x] = vertexInterp(isoValue, v[1], v[2], field[1], field[2]);
00318 vertlist[(NTHREADS*2)+threadIdx.x] = vertexInterp(isoValue, v[2], v[3], field[2], field[3]);
00319 vertlist[(NTHREADS*3)+threadIdx.x] = vertexInterp(isoValue, v[3], v[0], field[3], field[0]);
00320 vertlist[(NTHREADS*4)+threadIdx.x] = vertexInterp(isoValue, v[4], v[5], field[4], field[5]);
00321 vertlist[(NTHREADS*5)+threadIdx.x] = vertexInterp(isoValue, v[5], v[6], field[5], field[6]);
00322 vertlist[(NTHREADS*6)+threadIdx.x] = vertexInterp(isoValue, v[6], v[7], field[6], field[7]);
00323 vertlist[(NTHREADS*7)+threadIdx.x] = vertexInterp(isoValue, v[7], v[4], field[7], field[4]);
00324 vertlist[(NTHREADS*8)+threadIdx.x] = vertexInterp(isoValue, v[0], v[4], field[0], field[4]);
00325 vertlist[(NTHREADS*9)+threadIdx.x] = vertexInterp(isoValue, v[1], v[5], field[1], field[5]);
00326 vertlist[(NTHREADS*10)+threadIdx.x] = vertexInterp(isoValue, v[2], v[6], field[2], field[6]);
00327 vertlist[(NTHREADS*11)+threadIdx.x] = vertexInterp(isoValue, v[3], v[7], field[3], field[7]);
00328
00329
00330 unsigned int numVerts = tex1Dfetch(numVertsTex, cubeindex);
00331 for(int i=0; i<numVerts; i+=3) {
00332 unsigned int index = numVertsScanned[voxel].x + i;
00333
00334 float3 *vert[3];
00335 int edge;
00336 edge = tex1Dfetch(triTex, (cubeindex*16) + i);
00337 vert[0] = &vertlist[(edge*NTHREADS)+threadIdx.x];
00338
00339 edge = tex1Dfetch(triTex, (cubeindex*16) + i + 1);
00340 vert[1] = &vertlist[(edge*NTHREADS)+threadIdx.x];
00341
00342 edge = tex1Dfetch(triTex, (cubeindex*16) + i + 2);
00343 vert[2] = &vertlist[(edge*NTHREADS)+threadIdx.x];
00344
00345 if (index < maxVertsM3) {
00346 pos[index ] = *vert[0];
00347 pos[index+1] = *vert[1];
00348 pos[index+2] = *vert[2];
00349 }
00350 }
00351 }
00352
00353
00354 __global__ void offsetTriangleVertices(float3 *pos, float3 origin, unsigned int numVertsM1) {
00355 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00356 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00357 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00358
00359 if (i > numVertsM1)
00360 return;
00361
00362 float3 p = pos[i];
00363 p.x += origin.x;
00364 p.y += origin.y;
00365 p.z += origin.z;
00366 pos[i] = p;
00367 }
00368
00369
00370
00371 __global__ void generateTriangleNormals(float3 *pos, float3 *norm, float3 gridSizeInv, float3 bBoxInv, unsigned int numVerts) {
00372 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00373 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00374 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00375
00376 if (i > numVerts - 1)
00377 return;
00378
00379 float3 n;
00380 float3 p, p1, p2;
00381
00382
00383
00384 p = pos[i];
00385 p.x *= bBoxInv.x;
00386 p.y *= bBoxInv.y;
00387 p.z *= bBoxInv.z;
00388 p1 = p + make_float3( gridSizeInv.x * 1.0f, 0.0f, 0.0f);
00389 p2 = p - make_float3( gridSizeInv.x * 1.0f, 0.0f, 0.0f);
00390 n.x = tex3D( volumeTex, p2.x, p2.y, p2.z) - tex3D( volumeTex, p1.x, p1.y, p1.z);
00391 p1 = p + make_float3( 0.0f, gridSizeInv.y * 1.0f, 0.0f);
00392 p2 = p - make_float3( 0.0f, gridSizeInv.y * 1.0f, 0.0f);
00393 n.y = tex3D( volumeTex, p2.x, p2.y, p2.z) - tex3D( volumeTex, p1.x, p1.y, p1.z);
00394 p1 = p + make_float3( 0.0f, 0.0f, gridSizeInv.z * 1.0f);
00395 p2 = p - make_float3( 0.0f, 0.0f, gridSizeInv.z * 1.0f);
00396 n.z = tex3D( volumeTex, p2.x, p2.y, p2.z) - tex3D( volumeTex, p1.x, p1.y, p1.z);
00397
00398 norm[i] = n;
00399 }
00400
00401
00402
00403 __global__ void generateTriangleColorNormal(float3 *pos, float3 *col, float3 *norm, float3 *colors, uint3 gridSize, float3 gridSizeInv, float3 bBoxInv, unsigned int numVerts) {
00404 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00405 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00406 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00407
00408 if (i > numVerts - 1)
00409 return;
00410
00411 float3 p = pos[i];
00412 p.x *= bBoxInv.x;
00413 p.y *= bBoxInv.y;
00414 p.z *= bBoxInv.z;
00415
00416 float3 gridPosF = p;
00417 gridPosF.x *= float( gridSize.x);
00418 gridPosF.y *= float( gridSize.y);
00419 gridPosF.z *= float( gridSize.z);
00420 float3 gridPosFloor;
00421
00422
00423 gridPosFloor.x = floorf(gridPosF.x + 0.0001f);
00424 gridPosFloor.y = floorf(gridPosF.y + 0.0001f);
00425 gridPosFloor.z = floorf(gridPosF.z + 0.0001f);
00426 float3 gridPosCeil;
00427
00428
00429 gridPosCeil.x = ceilf(gridPosF.x - 0.0001f);
00430 gridPosCeil.y = ceilf(gridPosF.y - 0.0001f);
00431 gridPosCeil.z = ceilf(gridPosF.z - 0.0001f);
00432 uint3 gridPos0;
00433 gridPos0.x = gridPosFloor.x;
00434 gridPos0.y = gridPosFloor.y;
00435 gridPos0.z = gridPosFloor.z;
00436 uint3 gridPos1;
00437 gridPos1.x = gridPosCeil.x;
00438 gridPos1.y = gridPosCeil.y;
00439 gridPos1.z = gridPosCeil.z;
00440
00441 float3 field[2];
00442 field[0] = sampleColors(colors, gridPos0, gridSize);
00443 field[1] = sampleColors(colors, gridPos1, gridSize);
00444
00445 float3 tmp = gridPosF - gridPosFloor;
00446 float a = max( max( tmp.x, tmp.y), tmp.z);
00447 float3 c = lerp( field[0], field[1], a);
00448
00449 float3 p1, p2;
00450
00451 float3 n;
00452 p1 = p + make_float3( gridSizeInv.x * 1.0f, 0.0f, 0.0f);
00453 p2 = p - make_float3( gridSizeInv.x * 1.0f, 0.0f, 0.0f);
00454 n.x = tex3D( volumeTex, p2.x, p2.y, p2.z) - tex3D( volumeTex, p1.x, p1.y, p1.z);
00455 p1 = p + make_float3( 0.0f, gridSizeInv.y * 1.0f, 0.0f);
00456 p2 = p - make_float3( 0.0f, gridSizeInv.y * 1.0f, 0.0f);
00457 n.y = tex3D( volumeTex, p2.x, p2.y, p2.z) - tex3D( volumeTex, p1.x, p1.y, p1.z);
00458 p1 = p + make_float3( 0.0f, 0.0f, gridSizeInv.z * 1.0f);
00459 p2 = p - make_float3( 0.0f, 0.0f, gridSizeInv.z * 1.0f);
00460 n.z = tex3D( volumeTex, p2.x, p2.y, p2.z) - tex3D( volumeTex, p1.x, p1.y, p1.z);
00461
00462 norm[i] = n;
00463 col[i] = c;
00464 }
00465
00466
00467 void allocateTextures(int **d_triTable, unsigned int **d_numVertsTable) {
00468 cudaChannelFormatDesc channelDescSigned = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindSigned);
00469 cudaMalloc((void**) d_triTable, 256*16*sizeof(int));
00470 cudaMemcpy((void *)*d_triTable, (void *)triTable, 256*16*sizeof(int), cudaMemcpyHostToDevice);
00471 cudaBindTexture(0, triTex, *d_triTable, channelDescSigned);
00472
00473 cudaChannelFormatDesc channelDescUnsigned = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
00474 cudaMalloc((void**) d_numVertsTable, 256*sizeof(unsigned int));
00475 cudaMemcpy((void *)*d_numVertsTable, (void *)numVertsTable, 256*sizeof(unsigned int), cudaMemcpyHostToDevice);
00476 cudaBindTexture(0, numVertsTex, *d_numVertsTable, channelDescUnsigned);
00477 }
00478
00479
00480 void bindVolumeTexture( cudaArray *d_vol, cudaChannelFormatDesc desc) {
00481
00482 volumeTex.normalized = 1;
00483 volumeTex.filterMode = cudaFilterModeLinear;
00484
00485 volumeTex.addressMode[0] = cudaAddressModeClamp;
00486 volumeTex.addressMode[1] = cudaAddressModeClamp;
00487 volumeTex.addressMode[2] = cudaAddressModeClamp;
00488
00489 cudaBindTextureToArray( volumeTex, d_vol, desc);
00490 }
00491
00492
00493 #if 0
00494 void ThrustScanWrapper(unsigned int* output, unsigned int* input, unsigned int numElements) {
00495 thrust::exclusive_scan(thrust::device_ptr<unsigned int>(input),
00496 thrust::device_ptr<unsigned int>(input + numElements),
00497 thrust::device_ptr<unsigned int>(output));
00498 }
00499 #endif
00500
00501 void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) {
00502 const uint2 zero = make_uint2(0, 0);
00503 thrust::exclusive_scan(thrust::device_ptr<uint2>(input),
00504 thrust::device_ptr<uint2>(input + numElements),
00505 thrust::device_ptr<uint2>(output),
00506 zero);
00507 }
00508
00509
00510 void ThrustScanWrapperArea(float* output, float* input, unsigned int numElements) {
00511 thrust::inclusive_scan(thrust::device_ptr<float>(input),
00512 thrust::device_ptr<float>(input + numElements),
00513 thrust::device_ptr<float>(output));
00514 }
00515
00516
00517 __global__ void computeTriangleAreas( float3 *pos, float *area, unsigned int maxTria) {
00518 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00519 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00520 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00521
00522
00523 if (i >= maxTria)
00524 return;
00525
00526
00527 float3 v0 = pos[3*i];
00528 float3 v1 = pos[3*i+1];
00529 float3 v2 = pos[3*i+2];
00530
00531
00532 float a = length( v0 - v1);
00533 float b = length( v0 - v2);
00534 float c = length( v1 - v2);
00535
00536
00537 float rad = ( a + b + c) * ( a + b - c) * ( b + c - a) * ( c + a - b);
00538
00539 rad = rad > 0.0f ? rad : 0.0f;
00540 area[i] = 0.25f * sqrt( rad);
00541 }
00542
00543
00545
00546
00547
00549
00550 CUDAMarchingCubes::CUDAMarchingCubes() {
00551
00552 isoValue = 0.5f;
00553 maxNumVoxels = 0;
00554 numVoxels = 0;
00555 activeVoxels = 0;
00556 totalVerts = 0;
00557 d_volume = 0;
00558 d_colors = 0;
00559 d_volumeArray = 0;
00560 d_voxelVerts = 0;
00561 d_numVertsTable = 0;
00562 d_triTable = 0;
00563 useColor = false;
00564 useSubGrid = false;
00565 initialized = false;
00566 setdata = false;
00567 cudadevice = 0;
00568 cudacomputemajor = 0;
00569
00570
00571 cudaDeviceProp deviceProp;
00572 memset(&deviceProp, 0, sizeof(cudaDeviceProp));
00573
00574 if (cudaGetDevice(&cudadevice) != cudaSuccess) {
00575
00576 }
00577
00578 if (cudaGetDeviceProperties(&deviceProp, cudadevice) != cudaSuccess) {
00579
00580 }
00581
00582 cudacomputemajor = deviceProp.major;
00583 }
00584
00585
00586 CUDAMarchingCubes::~CUDAMarchingCubes() {
00587 Cleanup();
00588 }
00589
00590
00591 void CUDAMarchingCubes::Cleanup() {
00592 if( d_triTable ) cudaFree(d_triTable);
00593 if( d_numVertsTable ) cudaFree(d_numVertsTable);
00594 if( d_voxelVerts ) cudaFree(d_voxelVerts);
00595 if( d_compVoxelArray ) cudaFree(d_compVoxelArray);
00596 if( ownCudaDataArrays ) {
00597 if( d_volume ) cudaFree(d_volume);
00598 if( d_colors ) cudaFree(d_colors);
00599 }
00600 if( d_volumeArray) cudaFreeArray(d_volumeArray);
00601
00602 maxNumVoxels = 0;
00603 numVoxels = 0;
00604 d_triTable = 0;
00605 d_numVertsTable = 0;
00606 d_voxelVerts = 0;
00607 d_compVoxelArray = 0;
00608 d_volume = 0;
00609 d_colors = 0;
00610 ownCudaDataArrays = false;
00611 d_volumeArray = 0;
00612 initialized = false;
00613 setdata = false;
00614 }
00615
00616
00620 void CUDAMarchingCubes::computeIsosurface( float3* vertOut, float3* normOut, float3* colOut, unsigned int maxverts) {
00621
00622 if( !this->setdata ) return;
00623
00624
00625
00626
00627
00628
00629
00630 int threads = 256;
00631
00632 dim3 grid((unsigned int) (ceil(float(numVoxels) / float(threads))), 1, 1);
00633
00634 if (grid.x > 65535) {
00635 grid.y = (unsigned int) (ceil(float( grid.x) / 32768.0f));
00636 grid.x = 32768;
00637 }
00638
00639 dim3 threads3D( 256, 1, 1);
00640 dim3 grid3D((unsigned int) (ceil(float(gridSize.x) / float(threads3D.x))),
00641 (unsigned int) (ceil(float(gridSize.y) / float(threads3D.y))),
00642 (unsigned int) (ceil(float(gridSize.z) / float(threads3D.z))));
00643
00644
00645
00646 cudaThreadSynchronize();
00647
00648
00649 if (cudacomputemajor >= 2) {
00650
00651 if (useSubGrid) {
00652 classifyVoxel<1,1><<<grid3D, threads3D>>>(d_voxelVerts, d_volume,
00653 gridSize, numVoxels, voxelSize,
00654 subGridStart, subGridEnd, isoValue);
00655 } else {
00656 classifyVoxel<1,0><<<grid3D, threads3D>>>(d_voxelVerts, d_volume,
00657 gridSize, numVoxels, voxelSize,
00658 subGridStart, subGridEnd, isoValue);
00659 }
00660 } else {
00661
00662 if (useSubGrid) {
00663 classifyVoxel<0,1><<<grid, threads>>>(d_voxelVerts, d_volume,
00664 gridSize, numVoxels, voxelSize,
00665 subGridStart, subGridEnd, isoValue);
00666 } else {
00667 classifyVoxel<0,0><<<grid, threads>>>(d_voxelVerts, d_volume,
00668 gridSize, numVoxels, voxelSize,
00669 subGridStart, subGridEnd, isoValue);
00670 }
00671 }
00672
00673
00674 cudaThreadSynchronize();
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685 uint2 lastElement, lastScanElement;
00686 cudaMemcpy((void *) &lastElement, (void *)(d_voxelVerts + numVoxels-1), sizeof(uint2), cudaMemcpyDeviceToHost);
00687 cudaThreadSynchronize();
00688 ThrustScanWrapperUint2(d_voxelVerts, d_voxelVerts, numVoxels);
00689
00690 cudaThreadSynchronize();
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701 cudaMemcpy((void *) &lastScanElement, (void *) (d_voxelVerts + numVoxels-1), sizeof(uint2), cudaMemcpyDeviceToHost);
00702 activeVoxels = lastElement.y + lastScanElement.y;
00703
00704 totalVerts = lastElement.x + lastScanElement.x;
00705 totalVerts = totalVerts < maxverts ? totalVerts : maxverts;
00706
00707 if (activeVoxels==0) {
00708
00709 totalVerts = 0;
00710 return;
00711 }
00712
00713 grid.x = (unsigned int) (ceil(float(numVoxels) / float(threads)));
00714 grid.y = grid.z = 1;
00715
00716 if (grid.x > 65535) {
00717 grid.y = (unsigned int) (ceil(float(grid.x) / 32768.0f));
00718 grid.x = 32768;
00719 }
00720
00721
00722
00723 compactVoxels<<<grid, threads>>>(d_compVoxelArray, d_voxelVerts, lastElement.y, numVoxels, numVoxels + 1);
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734 dim3 grid2((unsigned int) (ceil(float(activeVoxels) / (float) NTHREADS)), 1, 1);
00735 while(grid2.x > 65535) {
00736 grid2.x = (unsigned int) (ceil(float(grid2.x) / 2.0f));
00737 grid2.y *= 2;
00738 }
00739
00740 dim3 grid3((unsigned int) (ceil(float(totalVerts) / (float) threads)), 1, 1);
00741 while(grid3.x > 65535) {
00742 grid3.x = (unsigned int) (ceil(float(grid3.x) / 2.0f));
00743 grid3.y *= 2;
00744 }
00745 while(grid3.y > 65535) {
00746 grid3.y = (unsigned int) (ceil(float(grid3.y) / 2.0f));
00747 grid3.z *= 2;
00748 }
00749
00750
00751
00752
00753 generateTriangleVerticesSMEM<<<grid2, NTHREADS>>>( vertOut,
00754 d_compVoxelArray, d_voxelVerts, d_volume, gridSize, voxelSize,
00755 isoValue, activeVoxels, maxverts - 3);
00756
00757 cudaThreadSynchronize();
00758
00759
00760 float3 gridSizeInv = make_float3( 1.0f / float( gridSize.x), 1.0f / float( gridSize.y), 1.0f / float( gridSize.z));
00761 float3 bBoxInv = make_float3( 1.0f / bBox.x, 1.0f / bBox.y, 1.0f / bBox.z);
00762 if( this->useColor ) {
00763 generateTriangleColorNormal<<<grid3, threads>>>(vertOut, colOut, normOut, (float3*)d_colors, this->gridSize, gridSizeInv, bBoxInv, totalVerts);
00764 } else {
00765 generateTriangleNormals<<<grid3, threads>>>(vertOut, normOut, gridSizeInv, bBoxInv, totalVerts);
00766 }
00767
00768
00769 cudaThreadSynchronize();
00770
00771
00772 offsetTriangleVertices<<<grid3, threads>>>(vertOut, this->origin, totalVerts - 1);
00773
00774
00775 cudaThreadSynchronize();
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788 }
00789
00790
00791 bool CUDAMarchingCubes::Initialize(uint3 maxgridsize) {
00792
00793 if (initialized) return false;
00794
00795
00796 maxGridSize = maxgridsize;
00797 gridSize = maxGridSize;
00798 maxNumVoxels = gridSize.x*gridSize.y*gridSize.z;
00799 numVoxels = maxNumVoxels;
00800
00801
00802 subGridStart = make_uint3(0, 0, 0);
00803 subGridEnd = gridSize - make_uint3(1, 1, 1);
00804
00805
00806 allocateTextures(&d_triTable, &d_numVertsTable);
00807
00808
00809 if (cudaMalloc((void**) &d_voxelVerts, sizeof(uint2) * numVoxels) != cudaSuccess) {
00810 Cleanup();
00811 return false;
00812 }
00813 if (cudaMalloc((void**) &d_compVoxelArray, sizeof(unsigned int) * numVoxels) != cudaSuccess) {
00814 Cleanup();
00815 return false;
00816 }
00817
00818
00819 initialized = true;
00820 return true;
00821 }
00822
00823
00824 bool CUDAMarchingCubes::SetVolumeData(float *volume, float *colors, uint3 gridsize,
00825 float3 gridOrigin, float3 boundingBox, bool cudaArray) {
00826 bool newgridsize = false;
00827
00828
00829 if (!initialized) return false;
00830
00831 #if 0
00832
00833 if (setdata) return false;
00834 if (d_volume != NULL) return false;
00835 #endif
00836
00837
00838 if (gridsize.x != gridSize.x ||
00839 gridsize.y != gridSize.y ||
00840 gridsize.z != gridSize.z) {
00841 newgridsize = true;
00842 int nv = gridsize.x*gridsize.y*gridsize.z;
00843 if (nv > maxNumVoxels)
00844 return false;
00845
00846 gridSize = gridsize;
00847 numVoxels = nv;
00848
00849
00850 subGridStart = make_uint3(0, 0, 0);
00851 subGridEnd = gridSize - make_uint3(1, 1, 1);
00852 }
00853
00854
00855
00856 origin = gridOrigin;
00857 bBox = boundingBox;
00858 voxelSize = make_float3(bBox.x / gridSize.x,
00859 bBox.y / gridSize.y,
00860 bBox.z / gridSize.z);
00861
00862
00863 useColor = colors ? true : false;
00864
00865
00866 if (cudaArray) {
00867
00868 if (ownCudaDataArrays) {
00869 if (d_volume) cudaFree(d_volume);
00870 d_volume = NULL;
00871
00872 if (d_colors) cudaFree(d_colors);
00873 d_colors = NULL;
00874 }
00875
00876
00877 d_volume = volume;
00878 d_colors = colors;
00879
00880
00881 ownCudaDataArrays = false;
00882 } else {
00883
00884 unsigned int size = numVoxels * sizeof(float);
00885 unsigned int maxsize = maxNumVoxels * sizeof(float);
00886
00887
00888 if (!d_volume) {
00889 if (cudaMalloc((void**) &d_volume, maxsize) != cudaSuccess) {
00890 Cleanup();
00891 return false;
00892 }
00893 if (cudaMemcpy(d_volume, volume, size, cudaMemcpyHostToDevice) != cudaSuccess) {
00894 Cleanup();
00895 return false;
00896 }
00897 }
00898
00899 if (colors) {
00900 if (!d_colors) {
00901
00902 if (cudaMalloc((void**) &d_colors, maxsize*3) != cudaSuccess) {
00903 Cleanup();
00904 return false;
00905 }
00906 if (cudaMemcpy(d_colors, colors, size*3, cudaMemcpyHostToDevice) != cudaSuccess ) {
00907 Cleanup();
00908 return false;
00909 }
00910 }
00911 }
00912
00913
00914 ownCudaDataArrays = true;
00915 }
00916
00917
00918 cudaExtent gridExtent = make_cudaExtent(gridSize.x, gridSize.y, gridSize.z);
00919 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
00920
00921
00922
00923 if (d_volumeArray && newgridsize) {
00924 cudaFreeArray(d_volumeArray);
00925 d_volumeArray = NULL;
00926 }
00927
00928
00929 if (!d_volumeArray) {
00930
00931 if (cudaMalloc3DArray(&d_volumeArray, &channelDesc, gridExtent) != cudaSuccess) {
00932 Cleanup();
00933 return false;
00934 }
00935 }
00936
00937
00938 cudaMemcpy3DParms copyParams = {0};
00939 copyParams.srcPtr = make_cudaPitchedPtr((void*)d_volume, 0, 0, 0);
00940 copyParams.dstArray = d_volumeArray;
00941 copyParams.extent = gridExtent;
00942 copyParams.kind = cudaMemcpyDeviceToDevice;
00943 if (cudaMemcpy3D(©Params) != cudaSuccess) {
00944 Cleanup();
00945 return false;
00946 }
00947
00948
00949 bindVolumeTexture(d_volumeArray, channelDesc);
00950
00951
00952 setdata = true;
00953
00954 return true;
00955 }
00956
00957
00958 void CUDAMarchingCubes::SetSubVolume(uint3 start, uint3 end) {
00959 subGridStart = start;
00960 subGridEnd = end;
00961 useSubGrid = true;
00962
00963 if (subGridEnd.x >= gridSize.x)
00964 subGridEnd.x = gridSize.x - 1;
00965 if (subGridEnd.y >= gridSize.y)
00966 subGridEnd.y = gridSize.y - 1;
00967 if (subGridEnd.z >= gridSize.z)
00968 subGridEnd.z = gridSize.z - 1;
00969 }
00970
00971
00972 bool CUDAMarchingCubes::computeIsosurface(float *volume, float *colors,
00973 uint3 gridsize, float3 gridOrigin,
00974 float3 boundingBox, bool cudaArray,
00975 float3* vertOut, float3* normOut,
00976 float3* colOut, unsigned int maxverts) {
00977
00978 if (!Initialize(gridsize)) {
00979 return false;
00980 }
00981
00982 if (!SetVolumeData(volume, colors, gridsize, gridOrigin, boundingBox, cudaArray)) {
00983 return false;
00984 }
00985
00986
00987 computeIsosurface(vertOut, normOut, colOut, maxverts);
00988
00989
00990 Cleanup();
00991
00992 return true;
00993 }
00994
00995
00996 float CUDAMarchingCubes::computeSurfaceArea( float3 *verts, unsigned int triaCount) {
00997
00998 if(triaCount <= 0) return 0.0f;
00999
01000
01001 float *d_areas;
01002 unsigned long memSize = sizeof(float) * triaCount;
01003 if (cudaMalloc((void**) &d_areas, memSize) != cudaSuccess) {
01004 return -1.0f;
01005 }
01006
01007
01008 cudaThreadSynchronize();
01009
01010
01011
01012 int threads = 256;
01013 dim3 grid(int(ceil(float(triaCount) / float(threads))), 1, 1);
01014
01015
01016 while(grid.x > 65535) {
01017 grid.x = (unsigned int) (ceil(float(grid.x) / 2.0f));
01018 grid.y *= 2;
01019 }
01020 while(grid.y > 65535) {
01021 grid.y = (unsigned int) (ceil(float(grid.y) / 2.0f));
01022 grid.z *= 2;
01023 }
01024
01025 computeTriangleAreas<<<grid, threads>>>(verts, d_areas, triaCount);
01026
01027 cudaThreadSynchronize();
01028
01029
01030
01031 ThrustScanWrapperArea( d_areas, d_areas, triaCount);
01032
01033 cudaThreadSynchronize();
01034
01035
01036
01037 float totalArea;
01038 cudaMemcpy((void *)&totalArea, (void *)(d_areas + triaCount - 1), sizeof(float), cudaMemcpyDeviceToHost);
01039
01040 cudaThreadSynchronize();
01041
01042
01043
01044 cudaFree(d_areas);
01045
01046 cudaThreadSynchronize();
01047
01048
01049
01050 return totalArea;
01051 }
01052
01053
01054