00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00060 #include "CUDAKernels.h"
00061 #define CUDAMARCHINGCUBES_INTERNAL 1
00062 #include "CUDAMarchingCubes.h"
00063 #include <thrust/device_ptr.h>
00064 #include <thrust/scan.h>
00065 #include <thrust/functional.h>
00066
00067 #include "CUDAParPrefixOps.h"
00068
00069
00070
00071
00072 #if 1 && (CUDART_VERSION >= 4000)
00073 #define RESTRICT __restrict__
00074 #else
00075 #define RESTRICT
00076 #endif
00077
00078
00079
00080 #define NTHREADS 48
00081
00082
00083 #define KERNTHREADS 256
00084
00085
00086
00087
00088
00089
00090 inline __host__ __device__ float3 operator+(float3 a, float3 b) {
00091 return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
00092 }
00093 inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) {
00094 return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
00095 }
00096 inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) {
00097 return make_uint2(a.x + b.x, a.y + b.y);
00098 }
00099
00100
00101 inline __host__ __device__ float3 operator-(float3 a, float3 b) {
00102 return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
00103 }
00104 inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) {
00105 return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
00106 }
00107
00108
00109 inline __host__ __device__ float3 operator*(float b, float3 a) {
00110 return make_float3(b * a.x, b * a.y, b * a.z);
00111 }
00112
00113
00114 inline __host__ __device__ float dot(float3 a, float3 b) {
00115 return a.x * b.x + a.y * b.y + a.z * b.z;
00116 }
00117
00118
00119 inline __host__ __device__ float3 fmaf3(float x, float3 y, float3 z) {
00120 return make_float3(fmaf(x, y.x, z.x),
00121 fmaf(x, y.y, z.y),
00122 fmaf(x, y.z, z.z));
00123 }
00124
00125
00126
00127 inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) {
00128 #if 0
00129 return fmaf3(t, b, fmaf3(-t, a, a));
00130 #elif 1
00131 return a + t*(b-a);
00132 #else
00133 return (1-t)*a + t*b;
00134 #endif
00135 }
00136
00137
00138 inline __host__ __device__ float length(float3 v) {
00139 return sqrtf(dot(v, v));
00140 }
00141
00142
00143
00144
00145 inline __device__ void convert_color(float3 & cf, float3 cf2) {
00146 cf = cf2;
00147 }
00148
00149 inline __device__ void convert_color(uchar4 & cu, float3 cf) {
00150
00151
00152 cu = make_uchar4(fminf(cf.x * 255.0f, 255.0f),
00153 fminf(cf.y * 255.0f, 255.0f),
00154 fminf(cf.z * 255.0f, 255.0f),
00155 255);
00156 }
00157
00158 inline __device__ void convert_color(float3 & cf, uchar4 cu) {
00159 const float i2f = 1.0f / 255.0f;
00160 cf.x = cu.x * i2f;
00161 cf.y = cu.y * i2f;
00162 cf.z = cu.z * i2f;
00163 }
00164
00165
00166
00167
00168 inline __device__ void convert_normal(float3 & nf, float3 nf2) {
00169 nf = nf2;
00170 }
00171
00172
00173 inline __device__ void convert_normal(char3 & cc, float3 cf) {
00174
00175 float invlen = rsqrtf(cf.x*cf.x + cf.y*cf.y + cf.z*cf.z);
00176
00177
00178 cc = make_char3(cf.x * invlen * 127.5f - 0.5f,
00179 cf.y * invlen * 127.5f - 0.5f,
00180 cf.z * invlen * 127.5f - 0.5f);
00181 }
00182
00183
00184
00185
00186
00187
00188
00189
00190 __device__ float sampleVolume(const float * RESTRICT data,
00191 uint3 p, uint3 gridSize) {
00192 return data[(p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x];
00193 }
00194
00195
00196
00197 __device__ float3 sampleColors(const float3 * RESTRICT data,
00198 uint3 p, uint3 gridSize) {
00199 return data[(p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x];
00200 }
00201
00202
00203 __device__ float3 sampleColors(const uchar4 * RESTRICT data,
00204 uint3 p, uint3 gridSize) {
00205 uchar4 cu = data[(p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x];
00206 float3 cf;
00207 convert_color(cf, cu);
00208 return cf;
00209 }
00210
00211
00212
00213 __device__ uint3 calcGridPos(unsigned int i, uint3 gridSize) {
00214 uint3 gridPos;
00215 unsigned int gridsizexy = gridSize.x * gridSize.y;
00216 gridPos.z = i / gridsizexy;
00217 unsigned int tmp1 = i - (gridsizexy * gridPos.z);
00218 gridPos.y = tmp1 / gridSize.x;
00219 gridPos.x = tmp1 - (gridSize.x * gridPos.y);
00220 return gridPos;
00221 }
00222
00223
00224
00225 __device__ float3 vertexInterp(float isolevel, float3 p0, float3 p1, float f0, float f1) {
00226 float t = (isolevel - f0) / (f1 - f0);
00227 return lerp(p0, p1, t);
00228 }
00229
00230
00231
00232
00233 template <int GRIDIS3D, int SUBGRID>
00234 __global__ void
00235
00236 classifyVoxel(uint2 * RESTRICT voxelVerts,
00237 cudaTextureObject_t numVertsTexObj,
00238 const float * RESTRICT volume,
00239 uint3 gridSize, unsigned int numVoxels, float3 voxelSize,
00240 uint3 subGridStart, uint3 subGridEnd,
00241 float isoValue) {
00242 uint3 gridPos;
00243 unsigned int i;
00244
00245
00246
00247 if (GRIDIS3D) {
00248
00249 gridPos.x = blockIdx.x * blockDim.x + threadIdx.x;
00250 gridPos.y = blockIdx.y * blockDim.y + threadIdx.y;
00251 gridPos.z = blockIdx.z * blockDim.z + threadIdx.z;
00252
00253
00254 if (gridPos.x >= gridSize.x ||
00255 gridPos.y >= gridSize.y ||
00256 gridPos.z >= gridSize.z)
00257 return;
00258
00259
00260 i = gridPos.z*gridSize.x*gridSize.y + gridPos.y*gridSize.x + gridPos.x;
00261 } else {
00262
00263 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00264 i = (blockId * blockDim.x) + threadIdx.x;
00265
00266
00267 if (i >= numVoxels)
00268 return;
00269
00270
00271 gridPos = calcGridPos(i, gridSize);
00272 }
00273
00274
00275
00276
00277 uint2 numVerts = make_uint2(0, 0);
00278 if (SUBGRID) {
00279 if (gridPos.x < subGridStart.x ||
00280 gridPos.y < subGridStart.y ||
00281 gridPos.z < subGridStart.z ||
00282 gridPos.x >= subGridEnd.x ||
00283 gridPos.y >= subGridEnd.y ||
00284 gridPos.z >= subGridEnd.z) {
00285 voxelVerts[i] = numVerts;
00286 return;
00287 }
00288 } else {
00289 if (gridPos.x > (gridSize.x - 2) || gridPos.y > (gridSize.y - 2) || gridPos.z > (gridSize.z - 2)) {
00290 voxelVerts[i] = numVerts;
00291 return;
00292 }
00293 }
00294
00295
00296 float field[8];
00297 field[0] = sampleVolume(volume, gridPos, gridSize);
00298
00299
00300
00301
00302
00303 field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
00304 field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
00305 field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
00306 field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
00307 field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
00308 field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
00309 field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
00310
00311
00312 unsigned int cubeindex;
00313 cubeindex = ((unsigned int) (field[0] < isoValue));
00314 cubeindex += ((unsigned int) (field[1] < isoValue))*2;
00315 cubeindex += ((unsigned int) (field[2] < isoValue))*4;
00316 cubeindex += ((unsigned int) (field[3] < isoValue))*8;
00317 cubeindex += ((unsigned int) (field[4] < isoValue))*16;
00318 cubeindex += ((unsigned int) (field[5] < isoValue))*32;
00319 cubeindex += ((unsigned int) (field[6] < isoValue))*64;
00320 cubeindex += ((unsigned int) (field[7] < isoValue))*128;
00321
00322
00323 numVerts.x = tex1Dfetch<unsigned int>(numVertsTexObj, int(cubeindex));
00324 numVerts.y = (numVerts.x > 0);
00325
00326 voxelVerts[i] = numVerts;
00327 }
00328
00329
00330
00331 __global__ void
00332
00333 compactVoxels(unsigned int * RESTRICT compactedVoxelArray,
00334 const uint2 * RESTRICT voxelOccupied,
00335 unsigned int lastVoxel, unsigned int numVoxels,
00336 unsigned int numVoxelsm1) {
00337 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00338 unsigned int i = (blockId * blockDim.x) + threadIdx.x;
00339
00340 if ((i < numVoxels) && ((i < numVoxelsm1) ? voxelOccupied[i].y < voxelOccupied[i+1].y : lastVoxel)) {
00341 compactedVoxelArray[ voxelOccupied[i].y ] = i;
00342 }
00343 }
00344
00345
00346
00347 __global__ void
00348
00349 generateTriangleVerticesSMEM(float3 * RESTRICT pos,
00350 const unsigned int * RESTRICT compactedVoxelArray,
00351 const uint2 * RESTRICT numVertsScanned,
00352 cudaTextureObject_t triTexObj,
00353 cudaTextureObject_t numVertsTexObj,
00354 const float * RESTRICT volume,
00355 uint3 gridSize, float3 voxelSize,
00356 float isoValue, unsigned int activeVoxels,
00357 unsigned int maxVertsM3) {
00358 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00359 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00360 unsigned int i = zOffset * (blockDim.x * blockDim.y) + (blockId * blockDim.x) + threadIdx.x;
00361
00362 if (i >= activeVoxels)
00363 return;
00364
00365 unsigned int voxel = compactedVoxelArray[i];
00366
00367
00368 uint3 gridPos = calcGridPos(voxel, gridSize);
00369
00370 float3 p;
00371 p.x = gridPos.x * voxelSize.x;
00372 p.y = gridPos.y * voxelSize.y;
00373 p.z = gridPos.z * voxelSize.z;
00374
00375
00376 float3 v[8];
00377 v[0] = p;
00378 v[1] = p + make_float3(voxelSize.x, 0, 0);
00379 v[2] = p + make_float3(voxelSize.x, voxelSize.y, 0);
00380 v[3] = p + make_float3(0, voxelSize.y, 0);
00381 v[4] = p + make_float3(0, 0, voxelSize.z);
00382 v[5] = p + make_float3(voxelSize.x, 0, voxelSize.z);
00383 v[6] = p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z);
00384 v[7] = p + make_float3(0, voxelSize.y, voxelSize.z);
00385
00386 float field[8];
00387 field[0] = sampleVolume(volume, gridPos, gridSize);
00388 field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
00389 field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
00390 field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
00391 field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
00392 field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
00393 field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
00394 field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
00395
00396
00397 unsigned int cubeindex;
00398 cubeindex = ((unsigned int)(field[0] < isoValue));
00399 cubeindex += ((unsigned int)(field[1] < isoValue))*2;
00400 cubeindex += ((unsigned int)(field[2] < isoValue))*4;
00401 cubeindex += ((unsigned int)(field[3] < isoValue))*8;
00402 cubeindex += ((unsigned int)(field[4] < isoValue))*16;
00403 cubeindex += ((unsigned int)(field[5] < isoValue))*32;
00404 cubeindex += ((unsigned int)(field[6] < isoValue))*64;
00405 cubeindex += ((unsigned int)(field[7] < isoValue))*128;
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416 __shared__ float3 vertlist[12*NTHREADS];
00417
00418 vertlist[threadIdx.x] = vertexInterp(isoValue, v[0], v[1], field[0], field[1]);
00419 vertlist[NTHREADS+threadIdx.x] = vertexInterp(isoValue, v[1], v[2], field[1], field[2]);
00420 vertlist[(NTHREADS*2)+threadIdx.x] = vertexInterp(isoValue, v[2], v[3], field[2], field[3]);
00421 vertlist[(NTHREADS*3)+threadIdx.x] = vertexInterp(isoValue, v[3], v[0], field[3], field[0]);
00422 vertlist[(NTHREADS*4)+threadIdx.x] = vertexInterp(isoValue, v[4], v[5], field[4], field[5]);
00423 vertlist[(NTHREADS*5)+threadIdx.x] = vertexInterp(isoValue, v[5], v[6], field[5], field[6]);
00424 vertlist[(NTHREADS*6)+threadIdx.x] = vertexInterp(isoValue, v[6], v[7], field[6], field[7]);
00425 vertlist[(NTHREADS*7)+threadIdx.x] = vertexInterp(isoValue, v[7], v[4], field[7], field[4]);
00426 vertlist[(NTHREADS*8)+threadIdx.x] = vertexInterp(isoValue, v[0], v[4], field[0], field[4]);
00427 vertlist[(NTHREADS*9)+threadIdx.x] = vertexInterp(isoValue, v[1], v[5], field[1], field[5]);
00428 vertlist[(NTHREADS*10)+threadIdx.x] = vertexInterp(isoValue, v[2], v[6], field[2], field[6]);
00429 vertlist[(NTHREADS*11)+threadIdx.x] = vertexInterp(isoValue, v[3], v[7], field[3], field[7]);
00430
00431
00432 unsigned int numVerts = tex1Dfetch<unsigned int>(numVertsTexObj, cubeindex);
00433 for(int i=0; i<numVerts; i+=3) {
00434 unsigned int index = numVertsScanned[voxel].x + i;
00435
00436 float3 *vert[3];
00437 int edge;
00438 edge = tex1Dfetch<int>(triTexObj, (cubeindex*16) + i);
00439 vert[0] = &vertlist[(edge*NTHREADS)+threadIdx.x];
00440
00441 edge = tex1Dfetch<int>(triTexObj, (cubeindex*16) + i + 1);
00442 vert[1] = &vertlist[(edge*NTHREADS)+threadIdx.x];
00443
00444 edge = tex1Dfetch<int>(triTexObj, (cubeindex*16) + i + 2);
00445 vert[2] = &vertlist[(edge*NTHREADS)+threadIdx.x];
00446
00447 if (index < maxVertsM3) {
00448 pos[index ] = *vert[0];
00449 pos[index+1] = *vert[1];
00450 pos[index+2] = *vert[2];
00451 }
00452 }
00453 }
00454
00455
00456 __global__ void
00457
00458 offsetTriangleVertices(float3 * RESTRICT pos,
00459 float3 origin, unsigned int numVertsM1) {
00460 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00461 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00462 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00463
00464 if (i > numVertsM1)
00465 return;
00466
00467 float3 p = pos[i];
00468 p.x += origin.x;
00469 p.y += origin.y;
00470 p.z += origin.z;
00471 pos[i] = p;
00472 }
00473
00474
00475
00476 template <class NORMAL>
00477 __global__ void
00478
00479 generateTriangleNormals(const float3 * RESTRICT pos, NORMAL *norm,
00480 cudaTextureObject_t volTexObj,
00481 float3 gridSizeInv,
00482 float3 bBoxInv, unsigned int numVerts) {
00483 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00484 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00485 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00486
00487 if (i > numVerts - 1)
00488 return;
00489
00490 float3 n;
00491 float3 p, p1, p2;
00492
00493
00494
00495 p = pos[i];
00496 p.x *= bBoxInv.x;
00497 p.y *= bBoxInv.y;
00498 p.z *= bBoxInv.z;
00499
00500 p1 = p + make_float3(gridSizeInv.x, 0.0f, 0.0f);
00501 p2 = p - make_float3(gridSizeInv.x, 0.0f, 0.0f);
00502 n.x = tex3D<float>(volTexObj, p2.x, p2.y, p2.z) - tex3D<float>(volTexObj, p1.x, p1.y, p1.z);
00503 p1 = p + make_float3(0.0f, gridSizeInv.y, 0.0f);
00504 p2 = p - make_float3(0.0f, gridSizeInv.y, 0.0f);
00505 n.y = tex3D<float>(volTexObj, p2.x, p2.y, p2.z) - tex3D<float>(volTexObj, p1.x, p1.y, p1.z);
00506 p1 = p + make_float3(0.0f, 0.0f, gridSizeInv.z);
00507 p2 = p - make_float3(0.0f, 0.0f, gridSizeInv.z);
00508 n.z = tex3D<float>(volTexObj, p2.x, p2.y, p2.z) - tex3D<float>(volTexObj, p1.x, p1.y, p1.z);
00509
00510 NORMAL no;
00511 convert_normal(no, n);
00512 norm[i] = no;
00513 }
00514
00515
00516
00517 template <class VERTEXCOL, class VOLTEX, class NORMAL>
00518 __global__ void
00519
00520 generateTriangleColorNormal(const float3 * RESTRICT pos,
00521 VERTEXCOL * RESTRICT col,
00522 NORMAL * RESTRICT norm,
00523 const VOLTEX * RESTRICT colors,
00524 cudaTextureObject_t volTexObj,
00525 uint3 gridSize, float3 gridSizeInv,
00526 float3 bBoxInv, unsigned int numVerts) {
00527 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00528 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00529 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00530
00531 if (i > numVerts - 1)
00532 return;
00533
00534 float3 p = pos[i];
00535 p.x *= bBoxInv.x;
00536 p.y *= bBoxInv.y;
00537 p.z *= bBoxInv.z;
00538
00539 float3 gridPosF = p;
00540 gridPosF.x *= float(gridSize.x);
00541 gridPosF.y *= float(gridSize.y);
00542 gridPosF.z *= float(gridSize.z);
00543 float3 gridPosFloor;
00544
00545
00546 gridPosFloor.x = floorf(gridPosF.x + 0.0001f);
00547 gridPosFloor.y = floorf(gridPosF.y + 0.0001f);
00548 gridPosFloor.z = floorf(gridPosF.z + 0.0001f);
00549 float3 gridPosCeil;
00550
00551
00552 gridPosCeil.x = ceilf(gridPosF.x - 0.0001f);
00553 gridPosCeil.y = ceilf(gridPosF.y - 0.0001f);
00554 gridPosCeil.z = ceilf(gridPosF.z - 0.0001f);
00555 uint3 gridPos0;
00556 gridPos0.x = gridPosFloor.x;
00557 gridPos0.y = gridPosFloor.y;
00558 gridPos0.z = gridPosFloor.z;
00559 uint3 gridPos1;
00560 gridPos1.x = gridPosCeil.x;
00561 gridPos1.y = gridPosCeil.y;
00562 gridPos1.z = gridPosCeil.z;
00563
00564
00565 float3 field[2];
00566 field[0] = sampleColors(colors, gridPos0, gridSize);
00567 field[1] = sampleColors(colors, gridPos1, gridSize);
00568 float3 tmp = gridPosF - gridPosFloor;
00569 float a = fmaxf(fmaxf(tmp.x, tmp.y), tmp.z);
00570 VERTEXCOL c;
00571 convert_color(c, lerp(field[0], field[1], a));
00572 col[i] = c;
00573
00574
00575 float3 p1, p2, n;
00576 p1 = p + make_float3(gridSizeInv.x, 0.0f, 0.0f);
00577 p2 = p - make_float3(gridSizeInv.x, 0.0f, 0.0f);
00578 n.x = tex3D<float>(volTexObj, p2.x, p2.y, p2.z) - tex3D<float>(volTexObj, p1.x, p1.y, p1.z);
00579 p1 = p + make_float3(0.0f, gridSizeInv.y, 0.0f);
00580 p2 = p - make_float3(0.0f, gridSizeInv.y, 0.0f);
00581 n.y = tex3D<float>(volTexObj, p2.x, p2.y, p2.z) - tex3D<float>(volTexObj, p1.x, p1.y, p1.z);
00582 p1 = p + make_float3(0.0f, 0.0f, gridSizeInv.z);
00583 p2 = p - make_float3(0.0f, 0.0f, gridSizeInv.z);
00584 n.z = tex3D<float>(volTexObj, p2.x, p2.y, p2.z) - tex3D<float>(volTexObj, p1.x, p1.y, p1.z);
00585
00586 NORMAL no;
00587 convert_normal(no, n);
00588 norm[i] = no;
00589 }
00590
00591
00592
00593 #if 0
00594
00595 void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) {
00596 const uint2 zero = make_uint2(0, 0);
00597 long scanwork_sz = 0;
00598 scanwork_sz = dev_excl_scan_sum_tmpsz(input, ((long) numElements), output, zero);
00599 void *scanwork_d = NULL;
00600 cudaMalloc(&scanwork_d, scanwork_sz);
00601 dev_excl_scan_sum(input, ((long) numElements), output, scanwork_d, scanwork_sz, zero);
00602 cudaFree(scanwork_d);
00603 }
00604
00605 #elif CUDART_VERSION >= 9000
00606
00607
00608
00609
00610
00611
00612
00613
00614 struct myuint2 : uint2 {
00615 __host__ __device__ myuint2() : uint2(make_uint2(0, 0)) {}
00616 __host__ __device__ myuint2(int val) : uint2(make_uint2(val, val)) {}
00617 __host__ __device__ myuint2(uint2 val) : uint2(make_uint2(val.x, val.y)) {}
00618 };
00619
00620 void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) {
00621 const uint2 zero = make_uint2(0, 0);
00622 thrust::exclusive_scan(thrust::device_ptr<myuint2>((myuint2*)input),
00623 thrust::device_ptr<myuint2>((myuint2*)input + numElements),
00624 thrust::device_ptr<myuint2>((myuint2*)output),
00625 (myuint2) zero);
00626 }
00627
00628 #else
00629
00630 void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) {
00631 const uint2 zero = make_uint2(0, 0);
00632 thrust::exclusive_scan(thrust::device_ptr<uint2>(input),
00633 thrust::device_ptr<uint2>(input + numElements),
00634 thrust::device_ptr<uint2>(output),
00635 zero);
00636 }
00637
00638 #endif
00639
00640 void ThrustScanWrapperArea(float* output, float* input, unsigned int numElements) {
00641 thrust::inclusive_scan(thrust::device_ptr<float>(input),
00642 thrust::device_ptr<float>(input + numElements),
00643 thrust::device_ptr<float>(output));
00644 }
00645
00646
00647 __global__ void
00648
00649 computeTriangleAreas(const float3 * RESTRICT pos,
00650 float * RESTRICT area, unsigned int maxTria) {
00651 unsigned int zOffset = blockIdx.z * gridDim.x * gridDim.y;
00652 unsigned int blockId = (blockIdx.y * gridDim.x) + blockIdx.x;
00653 unsigned int i = zOffset + (blockId * blockDim.x) + threadIdx.x;
00654
00655
00656 if (i >= maxTria)
00657 return;
00658
00659
00660 float3 v0 = pos[3*i];
00661 float3 v1 = pos[3*i+1];
00662 float3 v2 = pos[3*i+2];
00663
00664
00665 float a = length(v0 - v1);
00666 float b = length(v0 - v2);
00667 float c = length(v1 - v2);
00668
00669
00670 float rad = (a + b + c) * (a + b - c) * (b + c - a) * (c + a - b);
00671
00672 rad = rad > 0.0f ? rad : 0.0f;
00673 area[i] = 0.25f * sqrtf(rad);
00674 }
00675
00676
00678
00679
00680
00682
00683 CUDAMarchingCubes::CUDAMarchingCubes() {
00684
00685 isoValue = 0.5f;
00686 maxNumVoxels = 0;
00687 numVoxels = 0;
00688 activeVoxels = 0;
00689 totalVerts = 0;
00690 d_volume = 0;
00691 d_colors = 0;
00692 texformat = RGB3F;
00693 d_volumeArray = 0;
00694 d_voxelVerts = 0;
00695 d_numVertsTable = 0;
00696 d_triTable = 0;
00697 useColor = false;
00698 useSubGrid = false;
00699 initialized = false;
00700 setdata = false;
00701 cudadevice = 0;
00702 cudacomputemajor = 0;
00703
00704
00705 triTexObj = 0;
00706 numVertsTexObj = 0;
00707 volTexObj = 0;
00708
00709
00710 cudaDeviceProp deviceProp;
00711 memset(&deviceProp, 0, sizeof(cudaDeviceProp));
00712
00713 if (cudaGetDevice(&cudadevice) != cudaSuccess) {
00714
00715 }
00716
00717 if (cudaGetDeviceProperties(&deviceProp, cudadevice) != cudaSuccess) {
00718
00719 }
00720
00721 cudacomputemajor = deviceProp.major;
00722 }
00723
00724
00725 CUDAMarchingCubes::~CUDAMarchingCubes() {
00726 Cleanup();
00727 }
00728
00729
00730 void CUDAMarchingCubes::Cleanup() {
00731 if (d_triTable) {
00732
00733 cudaDestroyTextureObject(triTexObj);
00734 triTexObj = 0;
00735
00736 cudaFree(d_triTable);
00737 d_triTable = 0;
00738 }
00739
00740 if (d_numVertsTable) {
00741
00742 cudaDestroyTextureObject(numVertsTexObj);
00743 numVertsTexObj = 0;
00744
00745 cudaFree(d_numVertsTable);
00746 d_numVertsTable = 0;
00747 }
00748
00749 if (d_voxelVerts) {
00750 cudaFree(d_voxelVerts);
00751 d_voxelVerts = 0;
00752 }
00753
00754 if (d_compVoxelArray) {
00755 cudaFree(d_compVoxelArray);
00756 d_compVoxelArray = 0;
00757 }
00758
00759 if (ownCudaDataArrays) {
00760 if (d_volume) {
00761 cudaFree(d_volume);
00762 d_volume = 0;
00763 }
00764
00765 if (d_colors) {
00766 cudaFree(d_colors);
00767 d_colors = 0;
00768 }
00769 }
00770
00771 if (d_volumeArray) {
00772
00773 cudaDestroyTextureObject(volTexObj);
00774 volTexObj = 0;
00775
00776 cudaFreeArray(d_volumeArray);
00777 d_volumeArray = 0;
00778 }
00779
00780 maxNumVoxels = 0;
00781 numVoxels = 0;
00782 ownCudaDataArrays = false;
00783 initialized = false;
00784 setdata = false;
00785 }
00786
00787
00788
00789
00790
00791
00792 void CUDAMarchingCubes::allocateTextures(int **d_triTab, unsigned int **d_numVertsTab) {
00793
00794
00795
00796
00797
00798 long tritexsz = 256*16*sizeof(int);
00799 cudaMalloc((void**) d_triTab, tritexsz);
00800 cudaMemcpy((void *)*d_triTab, (void *)triTable, tritexsz, cudaMemcpyHostToDevice);
00801 cudaResourceDesc resDescSigned;
00802 memset(&resDescSigned, 0, sizeof(resDescSigned));
00803 resDescSigned.resType = cudaResourceTypeLinear;
00804 resDescSigned.res.linear.desc.f = cudaChannelFormatKindSigned;
00805 resDescSigned.res.linear.desc.x = 32;
00806 resDescSigned.res.linear.sizeInBytes = tritexsz;
00807 resDescSigned.res.linear.devPtr = *d_triTab;
00808
00809 cudaTextureDesc texDesc;
00810 memset(&texDesc, 0, sizeof(texDesc));
00811 texDesc.readMode = cudaReadModeElementType;
00812
00813 cudaCreateTextureObject(&triTexObj, &resDescSigned, &texDesc, NULL);
00814
00815
00816
00817 long numvertsz = 256*sizeof(unsigned int);
00818 cudaMalloc((void**) d_numVertsTab, numvertsz);
00819 cudaMemcpy((void *)*d_numVertsTab, (void *)numVertsTable, numvertsz, cudaMemcpyHostToDevice);
00820 cudaResourceDesc resDescUnsigned;
00821 memset(&resDescUnsigned, 0, sizeof(resDescUnsigned));
00822 resDescUnsigned.resType = cudaResourceTypeLinear;
00823 resDescUnsigned.res.linear.desc.f = cudaChannelFormatKindUnsigned;
00824 resDescUnsigned.res.linear.desc.x = 32;
00825 resDescUnsigned.res.linear.sizeInBytes = numvertsz;
00826 resDescUnsigned.res.linear.devPtr = *d_numVertsTab;
00827
00828 cudaTextureDesc numVertsDesc;
00829 memset(&numVertsDesc, 0, sizeof(numVertsDesc));
00830 numVertsDesc.readMode = cudaReadModeElementType;
00831
00832 cudaCreateTextureObject(&numVertsTexObj, &resDescUnsigned, &numVertsDesc, NULL);
00833 }
00834
00835
00836
00840 void CUDAMarchingCubes::computeIsosurfaceVerts(float3* vertOut, unsigned int maxverts, dim3 & grid3) {
00841
00842 if (!this->setdata)
00843 return;
00844
00845 int threads = 256;
00846 dim3 grid((unsigned int) (ceil(float(numVoxels) / float(threads))), 1, 1);
00847
00848 if (grid.x > 65535) {
00849 grid.y = (unsigned int) (ceil(float(grid.x) / 32768.0f));
00850 grid.x = 32768;
00851 }
00852
00853 dim3 threads3D(256, 1, 1);
00854 dim3 grid3D((unsigned int) (ceil(float(gridSize.x) / float(threads3D.x))),
00855 (unsigned int) (ceil(float(gridSize.y) / float(threads3D.y))),
00856 (unsigned int) (ceil(float(gridSize.z) / float(threads3D.z))));
00857
00858
00859 if (cudacomputemajor >= 2) {
00860
00861 if (useSubGrid) {
00862 classifyVoxel<1,1><<<grid3D, threads3D>>>(d_voxelVerts, numVertsTexObj,
00863 d_volume, gridSize, numVoxels, voxelSize,
00864 subGridStart, subGridEnd, isoValue);
00865 } else {
00866 classifyVoxel<1,0><<<grid3D, threads3D>>>(d_voxelVerts, numVertsTexObj,
00867 d_volume, gridSize, numVoxels, voxelSize,
00868 subGridStart, subGridEnd, isoValue);
00869 }
00870 } else {
00871
00872 if (useSubGrid) {
00873 classifyVoxel<0,1><<<grid, threads>>>(d_voxelVerts, numVertsTexObj,
00874 d_volume, gridSize, numVoxels, voxelSize,
00875 subGridStart, subGridEnd, isoValue);
00876 } else {
00877 classifyVoxel<0,0><<<grid, threads>>>(d_voxelVerts, numVertsTexObj,
00878 d_volume, gridSize, numVoxels, voxelSize,
00879 subGridStart, subGridEnd, isoValue);
00880 }
00881 }
00882
00883
00884 uint2 lastElement, lastScanElement;
00885 cudaMemcpy((void *) &lastElement, (void *)(d_voxelVerts + numVoxels-1), sizeof(uint2), cudaMemcpyDeviceToHost);
00886
00887 ThrustScanWrapperUint2(d_voxelVerts, d_voxelVerts, numVoxels);
00888
00889
00890
00891
00892 cudaMemcpy((void *) &lastScanElement, (void *) (d_voxelVerts + numVoxels-1), sizeof(uint2), cudaMemcpyDeviceToHost);
00893 activeVoxels = lastElement.y + lastScanElement.y;
00894
00895 totalVerts = lastElement.x + lastScanElement.x;
00896 totalVerts = totalVerts < maxverts ? totalVerts : maxverts;
00897
00898 if (activeVoxels==0) {
00899
00900 totalVerts = 0;
00901 return;
00902 }
00903
00904 grid.x = (unsigned int) (ceil(float(numVoxels) / float(threads)));
00905 grid.y = grid.z = 1;
00906
00907 if (grid.x > 65535) {
00908 grid.y = (unsigned int) (ceil(float(grid.x) / 32768.0f));
00909 grid.x = 32768;
00910 }
00911
00912
00913 compactVoxels<<<grid, threads>>>(d_compVoxelArray, d_voxelVerts, lastElement.y, numVoxels, numVoxels - 1);
00914
00915 dim3 grid2((unsigned int) (ceil(float(activeVoxels) / (float) NTHREADS)), 1, 1);
00916 while(grid2.x > 65535) {
00917 grid2.x = (unsigned int) (ceil(float(grid2.x) / 2.0f));
00918 grid2.y *= 2;
00919 }
00920
00921 grid3 = dim3((unsigned int) (ceil(float(totalVerts) / (float) threads)), 1, 1);
00922 while(grid3.x > 65535) {
00923 grid3.x = (unsigned int) (ceil(float(grid3.x) / 2.0f));
00924 grid3.y *= 2;
00925 }
00926 while(grid3.y > 65535) {
00927 grid3.y = (unsigned int) (ceil(float(grid3.y) / 2.0f));
00928 grid3.z *= 2;
00929 }
00930
00931
00932 generateTriangleVerticesSMEM<<<grid2, NTHREADS>>>(vertOut,
00933 d_compVoxelArray, d_voxelVerts,
00934 triTexObj, numVertsTexObj,
00935 d_volume, gridSize, voxelSize,
00936 isoValue, activeVoxels, maxverts - 3);
00937 }
00938
00939
00940
00941
00942
00943 void CUDAMarchingCubes::computeIsosurface(float3* vertOut, float3* normOut, float3* colOut, unsigned int maxverts) {
00944
00945 if (!this->setdata)
00946 return;
00947
00948 int threads = 256;
00949 dim3 grid3;
00950 computeIsosurfaceVerts(vertOut, maxverts, grid3);
00951
00952 float3 gridSizeInv = make_float3(1.0f / float(gridSize.x), 1.0f / float(gridSize.y), 1.0f / float(gridSize.z));
00953 float3 bBoxInv = make_float3(1.0f / bBox.x, 1.0f / bBox.y, 1.0f / bBox.z);
00954 if (this->useColor) {
00955 switch (texformat) {
00956 case RGB3F:
00957 generateTriangleColorNormal<float3, float3, float3><<<grid3, threads>>>(vertOut, colOut, normOut, (float3 *) d_colors,
00958 volTexObj, this->gridSize, gridSizeInv, bBoxInv, totalVerts);
00959 break;
00960
00961 case RGB4U:
00962 generateTriangleColorNormal<float3, uchar4, float3><<<grid3, threads>>>(vertOut, colOut, normOut, (uchar4 *) d_colors,
00963 volTexObj, this->gridSize, gridSizeInv, bBoxInv, totalVerts);
00964 break;
00965 }
00966 } else {
00967 generateTriangleNormals<float3><<<grid3, threads>>>(vertOut, normOut,
00968 volTexObj, gridSizeInv, bBoxInv, totalVerts);
00969 }
00970
00971 offsetTriangleVertices<<<grid3, threads>>>(vertOut, this->origin, totalVerts - 1);
00972 }
00973
00974
00975
00976
00977
00978 void CUDAMarchingCubes::computeIsosurface(float3* vertOut, float3* normOut, uchar4* colOut, unsigned int maxverts) {
00979
00980 if (!this->setdata)
00981 return;
00982
00983 int threads = 256;
00984 dim3 grid3;
00985 computeIsosurfaceVerts(vertOut, maxverts, grid3);
00986
00987 float3 gridSizeInv = make_float3(1.0f / float(gridSize.x), 1.0f / float(gridSize.y), 1.0f / float(gridSize.z));
00988 float3 bBoxInv = make_float3(1.0f / bBox.x, 1.0f / bBox.y, 1.0f / bBox.z);
00989 if (this->useColor) {
00990 switch (texformat) {
00991 case RGB3F:
00992 generateTriangleColorNormal<uchar4, float3, float3><<<grid3, threads>>>(vertOut, colOut, normOut, (float3 *) d_colors,
00993 volTexObj, this->gridSize, gridSizeInv, bBoxInv, totalVerts);
00994 break;
00995
00996 case RGB4U:
00997 generateTriangleColorNormal<uchar4, uchar4, float3><<<grid3, threads>>>(vertOut, colOut, normOut, (uchar4 *) d_colors,
00998 volTexObj, this->gridSize, gridSizeInv, bBoxInv, totalVerts);
00999 break;
01000 }
01001 } else {
01002 generateTriangleNormals<float3><<<grid3, threads>>>(vertOut, normOut,
01003 volTexObj, gridSizeInv, bBoxInv, totalVerts);
01004 }
01005
01006 offsetTriangleVertices<<<grid3, threads>>>(vertOut, this->origin, totalVerts - 1);
01007 }
01008
01009
01010
01011
01012
01013
01014 void CUDAMarchingCubes::computeIsosurface(float3* vertOut, char3* normOut, uchar4* colOut, unsigned int maxverts) {
01015
01016 if (!this->setdata)
01017 return;
01018
01019 int threads = 256;
01020 dim3 grid3;
01021 computeIsosurfaceVerts(vertOut, maxverts, grid3);
01022
01023 float3 gridSizeInv = make_float3(1.0f / float(gridSize.x), 1.0f / float(gridSize.y), 1.0f / float(gridSize.z));
01024 float3 bBoxInv = make_float3(1.0f / bBox.x, 1.0f / bBox.y, 1.0f / bBox.z);
01025 if (this->useColor) {
01026 switch (texformat) {
01027 case RGB3F:
01028 generateTriangleColorNormal<uchar4, float3, char3><<<grid3, threads>>>(vertOut, colOut, normOut, (float3 *) d_colors,
01029 volTexObj, this->gridSize, gridSizeInv, bBoxInv, totalVerts);
01030 break;
01031
01032 case RGB4U:
01033 generateTriangleColorNormal<uchar4, uchar4, char3><<<grid3, threads>>>(vertOut, colOut, normOut, (uchar4 *) d_colors,
01034 volTexObj, this->gridSize, gridSizeInv, bBoxInv, totalVerts);
01035 break;
01036 }
01037 } else {
01038 generateTriangleNormals<char3><<<grid3, threads>>>(vertOut, normOut,
01039 volTexObj, gridSizeInv, bBoxInv, totalVerts);
01040 }
01041
01042 offsetTriangleVertices<<<grid3, threads>>>(vertOut, this->origin, totalVerts - 1);
01043 }
01044
01045
01046 bool CUDAMarchingCubes::Initialize(uint3 maxgridsize) {
01047
01048 if (initialized) return false;
01049
01050
01051 maxGridSize = maxgridsize;
01052 gridSize = maxGridSize;
01053 maxNumVoxels = gridSize.x*gridSize.y*gridSize.z;
01054 numVoxels = maxNumVoxels;
01055
01056
01057 subGridStart = make_uint3(0, 0, 0);
01058 subGridEnd = gridSize - make_uint3(1, 1, 1);
01059
01060
01061 allocateTextures(&d_triTable, &d_numVertsTable);
01062
01063
01064 if (cudaMalloc((void**) &d_voxelVerts, sizeof(uint2) * numVoxels) != cudaSuccess) {
01065 Cleanup();
01066 return false;
01067 }
01068 if (cudaMalloc((void**) &d_compVoxelArray, sizeof(unsigned int) * numVoxels) != cudaSuccess) {
01069 Cleanup();
01070 return false;
01071 }
01072
01073
01074 initialized = true;
01075 return true;
01076 }
01077
01078
01079 bool CUDAMarchingCubes::SetVolumeData(float *volume, void *colors,
01080 VolTexFormat vtexformat, uint3 gridsize,
01081 float3 gridOrigin, float3 boundingBox,
01082 bool cudaArray) {
01083 bool newgridsize = false;
01084
01085
01086 if (!initialized) return false;
01087
01088
01089 if (gridsize.x != gridSize.x ||
01090 gridsize.y != gridSize.y ||
01091 gridsize.z != gridSize.z) {
01092 newgridsize = true;
01093 int nv = gridsize.x*gridsize.y*gridsize.z;
01094 if (nv > maxNumVoxels)
01095 return false;
01096
01097 gridSize = gridsize;
01098 numVoxels = nv;
01099
01100
01101 subGridStart = make_uint3(0, 0, 0);
01102 subGridEnd = gridSize - make_uint3(1, 1, 1);
01103 }
01104
01105
01106
01107 origin = gridOrigin;
01108 bBox = boundingBox;
01109 voxelSize = make_float3(bBox.x / gridSize.x,
01110 bBox.y / gridSize.y,
01111 bBox.z / gridSize.z);
01112
01113
01114 useColor = colors ? true : false;
01115
01116
01117 if (cudaArray) {
01118
01119 if (ownCudaDataArrays) {
01120 if (d_volume) cudaFree(d_volume);
01121 d_volume = NULL;
01122
01123 if (d_colors) cudaFree(d_colors);
01124 d_colors = NULL;
01125 }
01126
01127
01128 d_volume = volume;
01129 d_colors = colors;
01130 texformat = vtexformat;
01131
01132
01133 ownCudaDataArrays = false;
01134 } else {
01135
01136 unsigned int size = numVoxels * sizeof(float);
01137 unsigned int maxsize = maxNumVoxels * sizeof(float);
01138
01139
01140 if (!d_volume) {
01141 if (cudaMalloc((void**) &d_volume, maxsize) != cudaSuccess) {
01142 Cleanup();
01143 return false;
01144 }
01145 if (cudaMemcpy(d_volume, volume, size, cudaMemcpyHostToDevice) != cudaSuccess) {
01146 Cleanup();
01147 return false;
01148 }
01149 }
01150
01151 if (colors) {
01152 if (!d_colors) {
01153
01154 unsigned int maxtxsize = 0;
01155 unsigned int txsize = 0;
01156 texformat = vtexformat;
01157 switch (texformat) {
01158 case RGB3F:
01159 txsize = numVoxels * sizeof(float3);
01160 maxtxsize = maxNumVoxels * sizeof(float3);
01161 break;
01162
01163 case RGB4U:
01164 txsize = numVoxels * sizeof(uchar4);
01165 maxtxsize = maxNumVoxels * sizeof(uchar4);
01166 break;
01167 }
01168
01169 if (cudaMalloc((void**) &d_colors, maxtxsize) != cudaSuccess) {
01170 Cleanup();
01171 return false;
01172 }
01173 if (cudaMemcpy(d_colors, colors, txsize, cudaMemcpyHostToDevice) != cudaSuccess) {
01174 Cleanup();
01175 return false;
01176 }
01177 }
01178 }
01179
01180
01181 ownCudaDataArrays = true;
01182 }
01183
01184
01185 cudaExtent gridExtent = make_cudaExtent(gridSize.x, gridSize.y, gridSize.z);
01186 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
01187
01188
01189
01190 if (d_volumeArray && newgridsize) {
01191
01192 cudaDestroyTextureObject(volTexObj);
01193 volTexObj = 0;
01194
01195 cudaFreeArray(d_volumeArray);
01196 d_volumeArray = NULL;
01197 }
01198
01199
01200 if (!d_volumeArray) {
01201
01202 if (cudaMalloc3DArray(&d_volumeArray, &channelDesc, gridExtent) != cudaSuccess) {
01203 Cleanup();
01204 return false;
01205 }
01206 }
01207
01208
01209
01210
01211 cudaMemcpy3DParms copyParams = {0};
01212 copyParams.srcPtr = make_cudaPitchedPtr((void*)d_volume,
01213 gridExtent.width*sizeof(float),
01214 gridExtent.width,
01215 gridExtent.height);
01216 copyParams.dstArray = d_volumeArray;
01217 copyParams.extent = gridExtent;
01218 copyParams.kind = cudaMemcpyDeviceToDevice;
01219 if (cudaMemcpy3D(©Params) != cudaSuccess) {
01220 Cleanup();
01221 return false;
01222 }
01223
01224 cudaResourceDesc resDescVol;
01225 memset(&resDescVol, 0, sizeof(resDescVol));
01226 resDescVol.resType = cudaResourceTypeArray;
01227 resDescVol.res.array.array = d_volumeArray;
01228
01229 cudaTextureDesc volDesc;
01230 memset(&volDesc, 0, sizeof(volDesc));
01231 volDesc.normalizedCoords = 1;
01232 volDesc.filterMode = cudaFilterModeLinear;
01233 volDesc.addressMode[0] = cudaAddressModeClamp;
01234 volDesc.addressMode[1] = cudaAddressModeClamp;
01235 volDesc.addressMode[2] = cudaAddressModeClamp;
01236 volDesc.readMode = cudaReadModeElementType;
01237
01238 cudaCreateTextureObject(&volTexObj, &resDescVol, &volDesc, NULL);
01239
01240
01241 setdata = true;
01242
01243 return true;
01244 }
01245
01246
01247 bool CUDAMarchingCubes::SetVolumeData(float *volume, float3 *colors, uint3 gridsize, float3 gridOrigin, float3 boundingBox, bool cudaArray) {
01248 return SetVolumeData(volume, colors, RGB3F, gridsize, gridOrigin,
01249 boundingBox, cudaArray);
01250 }
01251
01252
01253 bool CUDAMarchingCubes::SetVolumeData(float *volume, uchar4 *colors, uint3 gridsize, float3 gridOrigin, float3 boundingBox, bool cudaArray) {
01254 return SetVolumeData(volume, colors, RGB4U, gridsize, gridOrigin,
01255 boundingBox, cudaArray);
01256 }
01257
01258
01259
01260 void CUDAMarchingCubes::SetSubVolume(uint3 start, uint3 end) {
01261 subGridStart = start;
01262 subGridEnd = end;
01263 useSubGrid = true;
01264
01265 if (subGridEnd.x >= gridSize.x)
01266 subGridEnd.x = gridSize.x - 1;
01267 if (subGridEnd.y >= gridSize.y)
01268 subGridEnd.y = gridSize.y - 1;
01269 if (subGridEnd.z >= gridSize.z)
01270 subGridEnd.z = gridSize.z - 1;
01271 }
01272
01273
01274 bool CUDAMarchingCubes::computeIsosurface(float *volume, void *colors,
01275 VolTexFormat vtexformat,
01276 uint3 gridsize, float3 gridOrigin,
01277 float3 boundingBox, bool cudaArray,
01278 float3* vertOut, float3* normOut,
01279 float3* colOut, unsigned int maxverts) {
01280
01281 if (!Initialize(gridsize))
01282 return false;
01283
01284 if (!SetVolumeData(volume, colors, vtexformat, gridsize, gridOrigin, boundingBox, cudaArray))
01285 return false;
01286
01287
01288 computeIsosurface(vertOut, normOut, colOut, maxverts);
01289
01290
01291 Cleanup();
01292
01293 return true;
01294 }
01295
01296
01297 float CUDAMarchingCubes::computeSurfaceArea(float3 *verts, unsigned int triaCount) {
01298
01299 if(triaCount <= 0) return 0.0f;
01300
01301
01302 float *d_areas;
01303 unsigned long memSize = sizeof(float) * triaCount;
01304 if (cudaMalloc((void**) &d_areas, memSize) != cudaSuccess) {
01305 return -1.0f;
01306 }
01307
01308
01309 const int threads = 256;
01310 dim3 grid(int(ceil(float(triaCount) / float(threads))), 1, 1);
01311
01312
01313 while(grid.x > 65535) {
01314 grid.x = (unsigned int) (ceil(float(grid.x) / 2.0f));
01315 grid.y *= 2;
01316 }
01317 while(grid.y > 65535) {
01318 grid.y = (unsigned int) (ceil(float(grid.y) / 2.0f));
01319 grid.z *= 2;
01320 }
01321
01322 computeTriangleAreas<<<grid, threads>>>(verts, d_areas, triaCount);
01323
01324
01325 ThrustScanWrapperArea(d_areas, d_areas, triaCount);
01326
01327
01328 float totalArea;
01329 cudaMemcpy((void *)&totalArea, (void *)(d_areas + triaCount - 1), sizeof(float), cudaMemcpyDeviceToHost);
01330
01331 cudaFree(d_areas);
01332 return totalArea;
01333 }
01334
01335
01336