Watershed.C Source File

00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: Watershed.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.38 $        $Date: 2020/10/15 17:51:01 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *   Watershed image segmentation
00019  ***************************************************************************/
00020 
00021 #include <stdlib.h>
00022 #include <string.h>
00023 #include <math.h>
00024 #include <stdio.h>
00025 #include <climits>
00026 #include "WKFUtils.h"
00027 #include "ProfileHooks.h"
00028 
00029 #define WATERSHED_INTERNAL 1
00030 #include "Watershed.h"
00031 #if defined(VMDCUDA)
00032 #include "CUDAWatershed.h"
00033 #endif
00034 
00035 #define UPDATE_VOXEL(and_value, idx, curr_group, smallest_value, smallest_offset) {\
00036   if (n_eq & and_value) {\
00037     const int idx_offset = idx + and_value##_offset;\
00038     if (current_state.value[idx_offset] + FLOAT_DIFF < smallest_value) {\
00039       smallest_value = current_state.value[idx_offset];\
00040       offset_number = and_value##_idx;\
00041       smallest_offset = and_value##_offset;\
00042     }\
00043     curr_group = current_state.group[idx_offset] < curr_group ?\
00044     current_state.group[idx_offset] : curr_group;\
00045   }}
00046 
00047 #define CALCULATE_NEIGHBORS(offset_str) {\
00048   const int idx_offset = idx + offset_str##_offset;\
00049   slope = float(curr_intensity - image[idx_offset]);\
00050   if (slope < smallest_slope) {\
00051     smallest_slope = slope;\
00052     curr_lower = offset_str##_idx;\
00053   } else if (slope >= -FLOAT_DIFF && slope <= FLOAT_DIFF) {\
00054     curr_n_eq |= offset_str;\
00055     if (idx_offset < min_group) {\
00056       min_group = idx_offset;\
00057     }\
00058   } }
00059 
00060 
00062 template <typename GROUP_T, typename IMAGE_T>
00063 Watershed<GROUP_T, IMAGE_T>::Watershed(unsigned int h, unsigned int w, unsigned int d, bool cuda) {
00064   height = h;
00065   width = w;
00066   depth = d;
00067   heightWidth = height * width;
00068   nVoxels = long(heightWidth) * long(depth);
00069 
00070 #ifdef VMDCUDA
00071   use_cuda = cuda; //this defaults to true unless overridden
00072 #else
00073   use_cuda = false;
00074 #endif
00075   memset(&gpu_state, 0, sizeof(gpu_state));
00076 
00077   current_state.group = new GROUP_T[nVoxels];
00078   current_state.value = new IMAGE_T[nVoxels];
00079   next_state.group = new GROUP_T[nVoxels];
00080   next_state.value = new IMAGE_T[nVoxels];
00081   equal_and_lower = new int[nVoxels];
00082   current_update = NULL;
00083   next_update = NULL;
00084 
00085   // If using block updates then allocate the current/next_update arrays
00086 #ifdef BLOCK_UPDATES
00087   update_width = ((width+UPDATE_SIZE) / UPDATE_SIZE) * UPDATE_SIZE;
00088   int buf_size = (depth * height * update_width) >> UPDATE_SHIFT;
00089   update_offset = 0;
00090 #ifdef FAST_UPDATES
00091   update_offset = update_width * height;
00092 #endif // FAST_UPDATES
00093   current_update = new unsigned char[buf_size + 2L * update_offset];
00094   next_update = new unsigned char[buf_size + 2L * update_offset];
00095   memset(current_update, 1, (2L*update_offset + buf_size) * sizeof(unsigned char));
00096   memset(next_update, 0, (2L*update_offset + buf_size) * sizeof(unsigned char));
00097 #endif //BLOCK_UPDATES
00098 
00099   //memcpy(intensity, data, nVoxels * sizeof(float));
00100 }
00101 
00102 
00103 template <typename GROUP_T, typename IMAGE_T>
00104 Watershed<GROUP_T, IMAGE_T>::~Watershed() {
00105 #if defined(VMDCUDA)
00106   destroy_gpu(gpu_state);
00107 #endif
00108   if (current_state.group != NULL) {
00109     delete [] current_state.group;
00110     delete [] current_state.value;
00111     current_state.group = NULL;
00112   }
00113   if (next_state.group != NULL) {
00114     delete [] next_state.group;
00115     delete [] next_state.value;
00116     next_state.group = NULL;
00117   }
00118   if (equal_and_lower == NULL) {
00119     delete [] equal_and_lower;
00120     equal_and_lower = NULL;
00121   }
00122   if (current_update != NULL) {
00123     delete [] current_update;
00124     current_update = NULL;
00125   }
00126   if (next_update != NULL) {
00127     delete [] next_update;
00128     next_update = NULL;
00129   }
00130 }
00131 
00132 
00133 template <typename GROUP_T, typename IMAGE_T>
00134 void Watershed<GROUP_T, IMAGE_T>::watershed(IMAGE_T* image, int imageongpu, GROUP_T* segments, bool verbose) { 
00135   if (imageongpu)
00136     PROFILE_PUSH_RANGE("Watershed::watershed(Image on GPU)", 0);
00137   else 
00138     PROFILE_PUSH_RANGE("Watershed::watershed(Image on host)", 0);
00139 
00140   // XXX we need to change this launch so that we always use the
00141   // GPU unless forbidden, and that we fallback to the CPU if the GPU
00142   // is forbidden, or we ran out of GPU memory, or an error occured
00143   // during execution on the GPU, so the calculation always proceeds.
00144 #ifdef TIMER
00145   wkf_timerhandle init_timer = wkf_timer_create();    
00146   wkf_timerhandle total_timer = wkf_timer_create();    
00147   wkf_timerhandle update_timer = wkf_timer_create();    
00148   wkf_timer_start(total_timer);
00149   wkf_timer_start(init_timer);
00150 #endif // TIMER
00151 
00152 #if defined(VMDCUDA)
00153   if (use_cuda) {
00154     init_gpu_on_device(gpu_state, image, imageongpu, width, height, depth);
00155   } else 
00156 #endif
00157   {
00158     // XXX deal with imageongpu if necessary, or eliminate possibility
00159     init(image);
00160   }
00161 
00162 #ifdef TIMER
00163   wkf_timer_stop(init_timer);
00164   wkf_timer_start(update_timer);
00165 #endif // TIMER
00166 
00167 #if defined(VMDCUDA)
00168   if (use_cuda) {
00169     watershed_gpu(segments);
00170   } else 
00171 #endif
00172     watershed_cpu(segments);
00173 
00174 #ifdef TIMER
00175   wkf_timer_stop(update_timer);
00176   wkf_timer_stop(total_timer);
00177   double update_time_sec = wkf_timer_time(update_timer);
00178   double total_time_sec = wkf_timer_time(total_timer);
00179   double init_time_sec = wkf_timer_time(init_timer);
00180   wkf_timer_destroy(init_timer);
00181   wkf_timer_destroy(update_timer);
00182   wkf_timer_destroy(total_timer);
00183   if (verbose) {
00184     printf("Watershed init:   %f seconds\n", init_time_sec);
00185     printf("Watershed update: %f seconds\n", update_time_sec);
00186     printf("Watershed total:  %f seconds\n", total_time_sec);
00187   }
00188 #endif
00189 
00190   PROFILE_POP_RANGE();
00191 }
00192 
00193 
00194 #if defined(VMDCUDA)
00195 
00196 template <typename GROUP_T, typename IMAGE_T>
00197 void Watershed<GROUP_T, IMAGE_T>::watershed_gpu(GROUP_T* segments_d) {
00198   PROFILE_PUSH_RANGE("Watershed::watershed_gpu()", 0);
00199 
00200   // XXX we may want to unify the timing approach between
00201   // both the CPU and GPU code paths at some point.  We can
00202   // also change this to do runtime tests for verbose timing output,
00203   // as is done in many other cases in VMD.
00204   update_cuda(gpu_state, segments_d);
00205 
00206   PROFILE_POP_RANGE();
00207 }
00208 
00209 #endif
00210 
00211 
00212 template <typename GROUP_T, typename IMAGE_T>
00213 void Watershed<GROUP_T, IMAGE_T>::watershed_cpu(GROUP_T* segments) {
00214   PROFILE_PUSH_RANGE("Watershed::watershed_cpu()", 1);
00215 
00216   unsigned int changes;
00217 #ifdef STATS
00218   unsigned int num_updates;
00219   unsigned int num_block_updates;
00220   unsigned long int total_updates = 0;
00221   unsigned long int total_blocks = 0;
00222   unsigned long int total_block_updates = 0;
00223   unsigned int numRounds = 0;
00224 #endif // STATS
00225 
00226   // keep performing update step of watershed algorithm until there are no changes
00227   do {
00228 #ifdef STATS
00229     changes = update_blocks_stats(num_updates, num_block_updates);
00230     total_updates += num_updates;
00231     ++numRounds;
00232     total_block_updates += num_block_updates;
00233     total_blocks += height * depth * (width / UPDATE_SIZE);
00234 #else // NOT STATS
00235 
00236 #ifdef BLOCK_UPDATES
00237     changes = update_blocks();
00238 #else // NOT BLOCK_UPDATES
00239     changes = update();
00240 #endif //BLOCK_UPDATES
00241 
00242 #endif // STATS
00243     SWAP(current_state.group, next_state.group, GROUP_T*);
00244     SWAP(current_state.value, next_state.value, IMAGE_T*);
00245   } while (changes);
00246 
00247 #ifdef STATS
00248   printf("Total_updates: %llu\n", total_updates);
00249   printf("Total_voxels: %llu\n", nVoxels);
00250   printf("Total_block_updates: %llu\n", total_block_updates);
00251   printf("Total_blocks: %llu\n", total_blocks);
00252   printf("Number of watershed rounds: %d\n", numRounds);
00253   double update_percent = 100.0 * total_updates / (double)(numRounds * nVoxels);
00254   double block_percent = 100.0 * total_block_updates / (double)total_blocks;
00255   printf("Block update percentage: %f %% \n", block_percent);
00256   printf("Update percentage: %f %% \n", update_percent);
00257 #endif //STATS
00258 
00259   memcpy(segments, current_state.group, nVoxels * sizeof(GROUP_T));
00260 
00261   PROFILE_POP_RANGE();
00262 }
00263 
00264 
00265 template <typename GROUP_T, typename IMAGE_T>
00266 void Watershed<GROUP_T, IMAGE_T>::init_neighbor_offsets() {
00267   int neighbor_offsets_t[19] = { 0,
00268     px_offset,
00269     py_offset,
00270     pz_offset,
00271     nx_offset,
00272     ny_offset,
00273     nz_offset,
00274 
00275     nx_ny_offset,
00276     nx_py_offset,
00277     px_py_offset,
00278     px_ny_offset,
00279 
00280     px_pz_offset,
00281     nx_nz_offset,
00282     nx_pz_offset,
00283     px_nz_offset,
00284 
00285     py_pz_offset,
00286     ny_nz_offset,
00287     ny_pz_offset,
00288     py_nz_offset,
00289   };
00290   memcpy(neighbor_offsets, neighbor_offsets_t, sizeof(neighbor_offsets_t));
00291 }
00292 
00293 // XXX TODO Description of scheme to incrementally initialize watershed:
00294 // create an init function with signature: partial_init(IMAGE_T* image, long start_idx, long stop_idx, long startGroupNum)
00295 // Call the function multiple times looping the start_idx - stop_idx over the image, where startGroupNum is
00296 // the number of groups that have been initialized so far.
00297 // The function is similar to the init() function below, except long min_group = idx + startGroupNum
00298 // After each time the function runs, we should call a modified sequentialize groups that only acts on indexes 0-stop_idx.
00299 // This scheme will work as described assuming a sequential CPU initialization because the only dependency is on
00300 // group numbers < the current group, so if those are already initialized there is no problem.
00301 //
00302 // One way to make this scheme work in parallel is to save an offset number (look at the min_group var for an example).
00303 // We would save an offset number in one kernel, and then immediately run a second kernel that 'chases' these offset numbers
00304 // until it finds a 0, and sets the group == to the group number at that index.
00305 //
00306 
00307 template <typename GROUP_T, typename IMAGE_T>
00308 void Watershed<GROUP_T, IMAGE_T>::init(IMAGE_T* image) {
00309   // XXX the initialization method accounts for about 50%
00310   //     of the total runtime for a CUDA-accelerated segmentation 
00311   PROFILE_PUSH_RANGE("Watershed::init()", 1);
00312 
00313 
00314   init_neighbor_offsets();
00315   memset(equal_and_lower, 0, nVoxels * sizeof(int));
00316 #ifdef BLOCK_UPDATES
00317   if (!use_cuda) {
00318     unsigned int buf_size = (depth * height * update_width) >> UPDATE_SHIFT;
00319     // 1 update offset on each end of update buffer
00320     memset(current_update, 1, (2*update_offset + buf_size) * sizeof(unsigned char));
00321     memset(next_update, 0, (2*update_offset + buf_size) * sizeof(unsigned char));
00322   }
00323 #endif // BLOCK_UPDATES
00324 
00325 
00326 #ifdef USE_OMP
00327 #pragma omp parallel for schedule(static)
00328 #endif
00329   for (int z = 0; z < depth; ++z) {
00330     for (int y = 0; y < height; ++y) {
00331       unsigned long yz = long(heightWidth) * z + long(width)*y;
00332       for (int x = 0; x < width; ++x) {
00333         float slope;
00334         float smallest_slope = -FLOAT_DIFF;
00335         long idx = yz + x;
00336         IMAGE_T curr_intensity = image[idx];
00337         long min_group = idx; // Group number = idx to start
00338         int curr_n_eq = 0;
00339         long curr_lower = 0;
00340         //TODO change to be actual offset instead of index into offset array
00341 
00342 #ifdef CONNECTED_18
00343         if (x > 0) {
00344           CALCULATE_NEIGHBORS(nx);
00345         }
00346         if (x < width-1) {
00347           CALCULATE_NEIGHBORS(px);
00348         }
00349 
00350         /* Calculate n_lower and n_eq */
00351         if (y > 0) {
00352           CALCULATE_NEIGHBORS(ny);
00353           if (x < width - 1) {
00354             CALCULATE_NEIGHBORS(px_ny);
00355           }
00356         }
00357         if (y < height-1) {
00358           CALCULATE_NEIGHBORS(py);
00359           if (x < width - 1) {
00360             CALCULATE_NEIGHBORS(px_py);
00361           }
00362         }
00363 
00364         if (z > 0) {
00365           CALCULATE_NEIGHBORS(nz);
00366           if (x < width - 1) {
00367             CALCULATE_NEIGHBORS(px_nz);
00368           }
00369           if (x > 0) {
00370             CALCULATE_NEIGHBORS(nx_nz);
00371           }
00372           if (y > 0 ) {
00373             CALCULATE_NEIGHBORS(ny_nz);
00374           }
00375           if (y < height - 1) {
00376             CALCULATE_NEIGHBORS(py_nz);
00377           }
00378         }
00379         if (z < depth-1) {
00380           CALCULATE_NEIGHBORS(pz);
00381           if (x < width - 1) {
00382             CALCULATE_NEIGHBORS(px_pz);
00383           }
00384           if (x > 0) {
00385             CALCULATE_NEIGHBORS(nx_pz);
00386           }
00387           if (y < height - 1) {
00388             CALCULATE_NEIGHBORS(py_pz);
00389           }
00390           if (y > 0) {
00391             CALCULATE_NEIGHBORS(ny_pz);
00392           }
00393         }
00394 #else // 6 connected neighbors
00395         if (x > 0) {
00396           CALCULATE_NEIGHBORS(nx);
00397         }
00398         if (x < width-1) {
00399           CALCULATE_NEIGHBORS(px);
00400         }
00401 
00402         /* Calculate n_lower and n_eq */
00403         if (y > 0) {
00404           CALCULATE_NEIGHBORS(ny);
00405         }
00406         if (y < height-1) {
00407           CALCULATE_NEIGHBORS(py);
00408         }
00409 
00410         if (z > 0) {
00411           CALCULATE_NEIGHBORS(nz);
00412         }
00413         if (z < depth-1) {
00414           CALCULATE_NEIGHBORS(pz);
00415         }
00416 #endif // CONNECTED_18
00417 
00418         /* Update current voxel */
00419         equal_and_lower[idx] = MAKE_EQUAL_AND_LOWER(curr_n_eq, curr_lower);
00420         if (curr_lower == 0) {
00421           /* Minimum */
00422           next_state.group[idx] = GROUP_T(min_group);
00423           next_state.value[idx] = image[idx];
00424         } else {
00425           /* Not minimum */
00426           const long nIdx = idx + neighbor_offsets[curr_lower];
00427           next_state.value[idx] = image[nIdx];
00428           next_state.group[idx] = GROUP_T(nIdx);
00429         }
00430       }
00431     }
00432   }
00433 
00434 #if defined(VMDCUDA)
00435   if (use_cuda) {
00436     use_cuda = init_gpu(next_state, equal_and_lower, gpu_state, width, height, depth);
00437     if (!use_cuda)
00438       printf("Warning: Could not initialize GPU. Falling back to host.\n");
00439   } 
00440 #endif
00441   if (!use_cuda){
00442     memcpy(current_state.group, next_state.group, nVoxels * sizeof(GROUP_T));
00443     memcpy(current_state.value, next_state.value, nVoxels * sizeof(IMAGE_T));
00444   }
00445 
00446   PROFILE_POP_RANGE();
00447 }
00448 
00449 
00450 template <typename GROUP_T, typename IMAGE_T>
00451 unsigned int Watershed<GROUP_T, IMAGE_T>::update_blocks() {
00452   unsigned int changes = 0;
00453   const unsigned long update_heightWidth = height * update_width;
00454   const unsigned long width_shift = update_width >> UPDATE_SHIFT;
00455   const unsigned long heightWidth_shift = update_heightWidth >> UPDATE_SHIFT;
00456 #ifdef USE_OMP
00457 #pragma omp parallel for schedule(guided)
00458 #endif
00459   for (int z = 0; z < depth; ++z) {
00460     for (int y = 0; y < height; ++y) {
00461       unsigned int local_changes = 0;
00462       const unsigned int update_yz = z * update_heightWidth + y * update_width + update_offset;
00463       const unsigned long yz = z * long(heightWidth) + y * long(width);
00464       for (int update_x = 0; update_x < width; update_x += UPDATE_SIZE) {
00465 
00466         const unsigned long update_idx = (update_yz + update_x) >> UPDATE_SHIFT;
00467         // only need to look at current block if it was marked as needing an update in prev. round
00468         if (current_update[update_idx]) {
00469           bool update = false;
00470           const int x_max = update_x + UPDATE_SIZE < width ? update_x + UPDATE_SIZE : width;
00471           for (int x = update_x; x < x_max; ++x) {
00472             const unsigned long idx = yz + x;
00473             const int curr_eq_lower = equal_and_lower[idx]; // packed equal Ns and lower Ns
00474             const int curr_offset_number = GET_N_LOWER(curr_eq_lower);
00475 
00476             if (curr_offset_number) {
00477               /* Not minimum */
00478               const unsigned int nidx = idx + neighbor_offsets[curr_offset_number];
00479               if ((next_state.group[idx] != current_state.group[nidx]) ||
00480                    next_state.value[idx] != current_state.value[nidx]) {
00481                 next_state.group[idx] = current_state.group[nidx];
00482                 next_state.value[idx] = current_state.value[nidx];
00483                 update = true;
00484               }
00485             } else {
00486               /* Minimum */
00487               unsigned int n_eq = GET_N_EQ(curr_eq_lower);
00488               IMAGE_T smallest_value = current_state.value[idx];
00489               GROUP_T curr_group = current_state.group[idx];
00490               int smallest_offset = 0;
00491               int offset_number = 0;
00492 
00493 #ifdef CONNECTED_18
00494               /* +- x */
00495               UPDATE_VOXEL(nx, idx, curr_group, smallest_value, smallest_offset);
00496               UPDATE_VOXEL(px, idx, curr_group, smallest_value, smallest_offset);
00497 
00498               /* +- x, -y*/
00499               UPDATE_VOXEL(nx_ny, idx, curr_group, smallest_value, smallest_offset);
00500               UPDATE_VOXEL(   ny, idx, curr_group, smallest_value, smallest_offset);
00501               UPDATE_VOXEL(px_ny, idx, curr_group, smallest_value, smallest_offset);
00502 
00503               /* +- x, +y*/
00504               UPDATE_VOXEL(nx_py, idx, curr_group, smallest_value, smallest_offset);
00505               UPDATE_VOXEL(   py, idx, curr_group, smallest_value, smallest_offset);
00506               UPDATE_VOXEL(px_py, idx, curr_group, smallest_value, smallest_offset);
00507 
00508 
00509               /* +- x, +z*/
00510               UPDATE_VOXEL(px_pz, idx, curr_group, smallest_value, smallest_offset);
00511               UPDATE_VOXEL(nx_pz, idx, curr_group, smallest_value, smallest_offset);
00512               /* +- y, +z*/
00513               UPDATE_VOXEL(py_pz, idx, curr_group, smallest_value, smallest_offset);
00514               UPDATE_VOXEL(   pz, idx, curr_group, smallest_value, smallest_offset);
00515               UPDATE_VOXEL(ny_pz, idx, curr_group, smallest_value, smallest_offset);
00516 
00517               /* +- x, -z*/
00518               UPDATE_VOXEL(nx_nz, idx, curr_group, smallest_value, smallest_offset);
00519               UPDATE_VOXEL(   nz, idx, curr_group, smallest_value, smallest_offset);
00520               UPDATE_VOXEL(px_nz, idx, curr_group, smallest_value, smallest_offset);
00521               /* +- y, -z*/
00522               UPDATE_VOXEL(py_nz, idx, curr_group, smallest_value, smallest_offset);
00523               UPDATE_VOXEL(ny_nz, idx, curr_group, smallest_value, smallest_offset);
00524 
00525 #else
00526               UPDATE_VOXEL(nx, idx, curr_group, smallest_value, smallest_offset);
00527               UPDATE_VOXEL(px, idx, curr_group, smallest_value, smallest_offset);
00528               UPDATE_VOXEL(ny, idx, curr_group, smallest_value, smallest_offset);
00529               UPDATE_VOXEL(py, idx, curr_group, smallest_value, smallest_offset);
00530               UPDATE_VOXEL(nz, idx, curr_group, smallest_value, smallest_offset);
00531               UPDATE_VOXEL(pz, idx, curr_group, smallest_value, smallest_offset);
00532 #endif
00533 
00534               /* 
00535                * curr_group is now the minimum group of all the
00536                * equal neighbors and current group
00537                */
00538               if (smallest_offset != 0) {
00539                 equal_and_lower[idx] = MAKE_EQUAL_AND_LOWER(n_eq, offset_number);
00540                 next_state.value[idx] = smallest_value;
00541                 next_state.group[idx] = current_state.group[idx + smallest_offset];
00542                 update = true;
00543               } else if (curr_group != next_state.group[idx]) {
00544                 next_state.group[idx] = curr_group;
00545                 update = true;
00546               }
00547             }
00548           }
00549           current_update[update_idx] = 0;
00550           if (update) {
00551             local_changes = 1;
00552             next_update[update_idx] = 1;
00553 #ifdef FAST_UPDATES
00554             next_update[update_idx - 1] = 1;
00555             next_update[update_idx + 1] = 1;
00556             next_update[update_idx - width_shift] = 1;
00557             next_update[update_idx + width_shift] = 1;
00558             next_update[update_idx - heightWidth_shift] = 1;
00559             next_update[update_idx + heightWidth_shift] = 1;
00560 #else
00561             if (update_x > 0) {
00562               next_update[update_idx - 1] = 1;
00563             }
00564             if (update_x < update_width-UPDATE_SIZE) {
00565               next_update[update_idx + 1] = 1;
00566             }
00567             if (y > 0) {
00568               next_update[update_idx - width_shift] = 1;
00569             }
00570             if (y < height - 1) {
00571               next_update[update_idx + width_shift] = 1;
00572             }
00573             if (z > 0) {
00574               next_update[update_idx - heightWidth_shift] = 1;
00575             }
00576             if (z < depth-1) {
00577               next_update[update_idx + heightWidth_shift] = 1;
00578             }
00579 #endif // FAST_UPDATES
00580           }
00581         }
00582       }
00583       if (local_changes) {
00584         changes = 1;
00585       }
00586     }
00587   }
00588   SWAP(current_update, next_update, unsigned char*);
00589   return changes;
00590 }
00591 
00592 
00593 template <typename GROUP_T, typename IMAGE_T>
00594 unsigned int Watershed<GROUP_T, IMAGE_T>::update() {
00595   unsigned int changes = 0;
00596 #ifdef USE_OMP
00597 #pragma omp parallel for schedule(guided)
00598 #endif
00599   for (int z = 0; z < depth; ++z) {
00600     for (int y = 0; y < height; ++y) {
00601       const unsigned long yz = z * long(heightWidth) + y * long(width);
00602       bool update = false;
00603       for (int x = 0; x < width; ++x) {
00604         const unsigned int idx = yz + x;
00605         const int curr_eq_lower = equal_and_lower[idx];
00606         const int curr_offset_number = GET_N_LOWER(curr_eq_lower);
00607 
00608         if (curr_offset_number) {
00609           /* Not minimum */
00610           const unsigned int nidx = idx + neighbor_offsets[curr_offset_number];
00611           if (next_state.group[idx] != current_state.group[nidx]) {
00612             next_state.group[idx] = current_state.group[nidx];
00613             next_state.value[idx] = current_state.value[nidx];
00614             update = true;
00615           }
00616         } else {
00617           /* Minimum */
00618           int n_eq = GET_N_EQ(curr_eq_lower);
00619           IMAGE_T smallest_value = current_state.value[idx];
00620           GROUP_T curr_group = current_state.group[idx];
00621           int smallest_offset = 0;
00622           int offset_number = 0;
00623 
00624 #ifdef CONNECTED_18
00625           /* +- x */
00626           UPDATE_VOXEL(nx, idx, curr_group, smallest_value, smallest_offset);
00627           UPDATE_VOXEL(px, idx, curr_group, smallest_value, smallest_offset);
00628 
00629           /* +- x, -y*/
00630           UPDATE_VOXEL(nx_ny, idx, curr_group, smallest_value, smallest_offset);
00631           UPDATE_VOXEL(   ny, idx, curr_group, smallest_value, smallest_offset);
00632           UPDATE_VOXEL(px_ny, idx, curr_group, smallest_value, smallest_offset);
00633 
00634           /* +- x, +y*/
00635           UPDATE_VOXEL(nx_py, idx, curr_group, smallest_value, smallest_offset);
00636           UPDATE_VOXEL(   py, idx, curr_group, smallest_value, smallest_offset);
00637           UPDATE_VOXEL(px_py, idx, curr_group, smallest_value, smallest_offset);
00638 
00639 
00640           /* +- x, +z*/
00641           UPDATE_VOXEL(px_pz, idx, curr_group, smallest_value, smallest_offset);
00642           UPDATE_VOXEL(nx_pz, idx, curr_group, smallest_value, smallest_offset);
00643           /* +- y, +z*/
00644           UPDATE_VOXEL(py_pz, idx, curr_group, smallest_value, smallest_offset);
00645           UPDATE_VOXEL(   pz, idx, curr_group, smallest_value, smallest_offset);
00646           UPDATE_VOXEL(ny_pz, idx, curr_group, smallest_value, smallest_offset);
00647 
00648           /* +- x, -z*/
00649           UPDATE_VOXEL(nx_nz, idx, curr_group, smallest_value, smallest_offset);
00650           UPDATE_VOXEL(   nz, idx, curr_group, smallest_value, smallest_offset);
00651           UPDATE_VOXEL(px_nz, idx, curr_group, smallest_value, smallest_offset);
00652           /* +- y, -z*/
00653           UPDATE_VOXEL(py_nz, idx, curr_group, smallest_value, smallest_offset);
00654           UPDATE_VOXEL(ny_nz, idx, curr_group, smallest_value, smallest_offset);
00655 
00656 #else // 6 connected neighbors
00657           UPDATE_VOXEL(nx, idx, curr_group, smallest_value, smallest_offset);
00658           UPDATE_VOXEL(px, idx, curr_group, smallest_value, smallest_offset);
00659           UPDATE_VOXEL(ny, idx, curr_group, smallest_value, smallest_offset);
00660           UPDATE_VOXEL(py, idx, curr_group, smallest_value, smallest_offset);
00661           UPDATE_VOXEL(nz, idx, curr_group, smallest_value, smallest_offset);
00662           UPDATE_VOXEL(pz, idx, curr_group, smallest_value, smallest_offset);
00663 #endif // CONNECTED_18
00664 
00665           /* 
00666            * curr_group is now the minimum group of all the
00667            * equal neighbors and current group
00668            */
00669           if (smallest_offset) {
00670             equal_and_lower[idx] = MAKE_EQUAL_AND_LOWER(n_eq, offset_number);
00671             next_state.value[idx] = smallest_value;
00672             next_state.group[idx] = current_state.group[idx + smallest_offset];
00673             update = true;
00674           } else if (curr_group != next_state.group[idx]) {
00675             next_state.group[idx] = curr_group;
00676             update = true;
00677           }
00678         }
00679       }
00680       if (update) {
00681         changes = 1;
00682       }
00683     }
00684   }
00685   return changes;
00686 }
00687 
00688 #define INST_WATERSHED(G_T) \
00689 template class Watershed<G_T, float>;\
00690 template class Watershed<G_T, unsigned short>;\
00691 template class Watershed<G_T, unsigned char>;
00692 
00693 INST_WATERSHED(unsigned long)
00694 INST_WATERSHED(unsigned int)
00695 INST_WATERSHED(unsigned short)