aboutsummaryrefslogtreecommitdiff
path: root/skelftkernel.h
diff options
context:
space:
mode:
Diffstat (limited to 'skelftkernel.h')
-rw-r--r--skelftkernel.h468
1 files changed, 468 insertions, 0 deletions
diff --git a/skelftkernel.h b/skelftkernel.h
new file mode 100644
index 0000000..1182181
--- /dev/null
+++ b/skelftkernel.h
@@ -0,0 +1,468 @@
+// CUDA-based Skeletonization, Distance Transforms, Feature Transforms, Erosion
+// and Dilation, and Flood Filling Toolkit
+//
+//(c) Alexandru Telea, Univ. of Groningen, 2011
+//
+// This code is adapted from "Parallel Banding Algorithm to compute exact
+// distance transforms with the GPU",
+// T. Cao, K. Tang, A. Mohamed, T. Tan, Proc. ACM Symo. on Interactive 3D
+// Graphics and Games (I3D), 2010, 83-90
+//====================================================================================================================
+
+// MARKER is used to mark blank pixels in the texture. Any uncolored pixels will
+// have x = MARKER.
+// Input texture should have x = MARKER for all pixels other than sites
+#define MARKER -32768
+
+#define TOID(x, y, size) (__mul24((y), (size)) + (x))
+
+// Transpose a square matrix
+__global__ void kernelTranspose(short2 *data, int size) {
+ __shared__ short2 block1[TILE_DIM][TILE_DIM + 1];
+ __shared__ short2 block2[TILE_DIM][TILE_DIM + 1];
+
+ int blockIdx_y = blockIdx.x;
+ int blockIdx_x = blockIdx.x + blockIdx.y;
+
+ if (blockIdx_x >= gridDim.x)
+ return;
+
+ int blkX, blkY, x, y, id1, id2;
+ short2 pixel;
+
+ blkX = __mul24(blockIdx_x, TILE_DIM);
+ blkY = __mul24(blockIdx_y, TILE_DIM);
+
+ x = blkX + threadIdx.x;
+ y = blkY + threadIdx.y;
+ id1 = __mul24(y, size) + x;
+
+ x = blkY + threadIdx.x;
+ y = blkX + threadIdx.y;
+ id2 = __mul24(y, size) + x;
+
+ // read the matrix tile into shared memory
+ for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
+ int idx = __mul24(i, size);
+ block1[threadIdx.y + i][threadIdx.x] =
+ tex1Dfetch(pbaTexColor, id1 + idx);
+ block2[threadIdx.y + i][threadIdx.x] =
+ tex1Dfetch(pbaTexColor, id2 + idx);
+ }
+
+ __syncthreads();
+
+ // write the transposed matrix tile to global memory
+ for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
+ int idx = __mul24(i, size);
+ pixel = block1[threadIdx.x][threadIdx.y + i];
+ data[id2 + idx] = make_short2(pixel.y, pixel.x);
+ pixel = block2[threadIdx.x][threadIdx.y + i];
+ data[id1 + idx] = make_short2(pixel.y, pixel.x);
+ }
+}
+
+__global__ void kernelFloodDown(short2 *output, int size, int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * bandSize;
+ int id = TOID(tx, ty, size);
+
+ short2 pixel1, pixel2;
+
+ pixel1 = make_short2(MARKER, MARKER);
+
+ for (int i = 0; i < bandSize; ++i, id += size) {
+ pixel2 = tex1Dfetch(pbaTexColor, id);
+
+ if (pixel2.x != MARKER)
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+}
+
+__global__ void kernelFloodUp(short2 *output, int size, int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = (blockIdx.y + 1) * bandSize - 1;
+ int id = TOID(tx, ty, size);
+
+ short2 pixel1, pixel2;
+ int dist1, dist2;
+
+ pixel1 = make_short2(MARKER, MARKER);
+
+ for (int i = 0; i < bandSize; i++, id -= size) {
+ dist1 = abs(pixel1.y - ty + i);
+
+ pixel2 = tex1Dfetch(pbaTexColor, id);
+ dist2 = abs(pixel2.y - ty + i);
+
+ if (dist2 < dist1)
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+}
+
+__global__ void kernelPropagateInterband(short2 *output, int size,
+ int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int inc = __mul24(bandSize, size);
+ int ny, nid, nDist;
+ short2 pixel;
+
+ // Top row, look backward
+ int ty = __mul24(blockIdx.y, bandSize);
+ int topId = TOID(tx, ty, size);
+ int bottomId = TOID(tx, ty + bandSize - 1, size);
+
+ pixel = tex1Dfetch(pbaTexColor, topId);
+ int myDist = abs(pixel.y - ty);
+
+ for (nid = bottomId - inc; nid >= 0; nid -= inc) {
+ pixel = tex1Dfetch(pbaTexColor, nid);
+ if (pixel.x != MARKER) {
+ nDist = abs(pixel.y - ty);
+ if (nDist < myDist)
+ output[topId] = pixel;
+ break;
+ }
+ }
+
+ // Last row, look downward
+ ty = ty + bandSize - 1;
+ pixel = tex1Dfetch(pbaTexColor, bottomId);
+ myDist = abs(pixel.y - ty);
+
+ for (ny = ty + 1, nid = topId + inc; ny < size;
+ ny += bandSize, nid += inc) {
+ pixel = tex1Dfetch(pbaTexColor, nid);
+
+ if (pixel.x != MARKER) {
+ nDist = abs(pixel.y - ty);
+ if (nDist < myDist)
+ output[bottomId] = pixel;
+ break;
+ }
+ }
+}
+
+__global__ void kernelUpdateVertical(short2 *output, int size, int band,
+ int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * bandSize;
+
+ short2 top = tex1Dfetch(pbaTexLinks, TOID(tx, ty, size));
+ short2 bottom = tex1Dfetch(pbaTexLinks, TOID(tx, ty + bandSize - 1, size));
+ short2 pixel;
+
+ int dist, myDist;
+
+ int id = TOID(tx, ty, size);
+
+ for (int i = 0; i < bandSize; i++, id += size) {
+ pixel = tex1Dfetch(pbaTexColor, id);
+ myDist = abs(pixel.y - (ty + i));
+
+ dist = abs(top.y - (ty + i));
+ if (dist < myDist) {
+ myDist = dist;
+ pixel = top;
+ }
+
+ dist = abs(bottom.y - (ty + i));
+ if (dist < myDist)
+ pixel = bottom;
+
+ output[id] = pixel;
+ }
+}
+
+// Input: y1 < y2
+__device__ float interpoint(int x1, int y1, int x2, int y2, int x0) {
+ float xM = float(x1 + x2) / 2.0f;
+ float yM = float(y1 + y2) / 2.0f;
+ float nx = x2 - x1;
+ float ny = y2 - y1;
+
+ return yM + nx * (xM - x0) / ny;
+}
+
+__global__ void kernelProximatePoints(short2 *stack, int size, int bandSize) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int ty = __mul24(blockIdx.y, bandSize);
+ int id = TOID(tx, ty, size);
+ int lasty = -1;
+ short2 last1, last2, current;
+ float i1, i2;
+
+ last1.y = -1;
+ last2.y = -1;
+
+ for (int i = 0; i < bandSize; i++, id += size) {
+ current = tex1Dfetch(pbaTexColor, id);
+
+ if (current.x != MARKER) {
+ while (last2.y >= 0) {
+ i1 = interpoint(last1.x, last2.y, last2.x, lasty, tx);
+ i2 = interpoint(last2.x, lasty, current.x, current.y, tx);
+
+ if (i1 < i2)
+ break;
+
+ lasty = last2.y;
+ last2 = last1;
+
+ if (last1.y >= 0)
+ last1 = stack[TOID(tx, last1.y, size)];
+ }
+
+ last1 = last2;
+ last2 = make_short2(current.x, lasty);
+ lasty = current.y;
+
+ stack[id] = last2;
+ }
+ }
+
+ // Store the pointer to the tail at the last pixel of this band
+ if (lasty != ty + bandSize - 1)
+ stack[TOID(tx, ty + bandSize - 1, size)] = make_short2(MARKER, lasty);
+}
+
+__global__ void kernelCreateForwardPointers(short2 *output, int size,
+ int bandSize) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int ty = __mul24(blockIdx.y + 1, bandSize) - 1;
+ int id = TOID(tx, ty, size);
+ int lasty = -1, nexty;
+ short2 current;
+
+ // Get the tail pointer
+ current = tex1Dfetch(pbaTexLinks, id);
+
+ if (current.x == MARKER)
+ nexty = current.y;
+ else
+ nexty = ty;
+
+ for (int i = 0; i < bandSize; i++, id -= size)
+ if (ty - i == nexty) {
+ current = make_short2(lasty, tex1Dfetch(pbaTexLinks, id).y);
+ output[id] = current;
+
+ lasty = nexty;
+ nexty = current.y;
+ }
+
+ // Store the pointer to the head at the first pixel of this band
+ if (lasty != ty - bandSize + 1)
+ output[id + size] = make_short2(lasty, MARKER);
+}
+
+__global__ void kernelMergeBands(short2 *output, int size, int bandSize) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int band1 = blockIdx.y * 2;
+ int band2 = band1 + 1;
+ int firsty, lasty;
+ short2 last1, last2, current;
+ // last1 and last2: x component store the x coordinate of the site,
+ // y component store the backward pointer
+ // current: y component store the x coordinate of the site,
+ // x component store the forward pointer
+
+ // Get the two last items of the first list
+ lasty = __mul24(band2, bandSize) - 1;
+ last2 = make_short2(tex1Dfetch(pbaTexColor, TOID(tx, lasty, size)).x,
+ tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size)).y);
+
+ if (last2.x == MARKER) {
+ lasty = last2.y;
+
+ if (lasty >= 0)
+ last2 =
+ make_short2(tex1Dfetch(pbaTexColor, TOID(tx, lasty, size)).x,
+ tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size)).y);
+ else
+ last2 = make_short2(MARKER, MARKER);
+ }
+
+ if (last2.y >= 0) {
+ // Second item at the top of the stack
+ last1 = make_short2(tex1Dfetch(pbaTexColor, TOID(tx, last2.y, size)).x,
+ tex1Dfetch(pbaTexLinks, TOID(tx, last2.y, size)).y);
+ }
+
+ // Get the first item of the second band
+ firsty = __mul24(band2, bandSize);
+ current = make_short2(tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size)).x,
+ tex1Dfetch(pbaTexColor, TOID(tx, firsty, size)).x);
+
+ if (current.y == MARKER) {
+ firsty = current.x;
+
+ if (firsty >= 0)
+ current =
+ make_short2(tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size)).x,
+ tex1Dfetch(pbaTexColor, TOID(tx, firsty, size)).x);
+ else
+ current = make_short2(MARKER, MARKER);
+ }
+
+ float i1, i2;
+
+ // Count the number of item in the second band that survive so far.
+ // Once it reaches 2, we can stop.
+ int top = 0;
+
+ while (top < 2 && current.y >= 0) {
+ // While there's still something on the left
+ while (last2.y >= 0) {
+ i1 = interpoint(last1.x, last2.y, last2.x, lasty, tx);
+ i2 = interpoint(last2.x, lasty, current.y, firsty, tx);
+
+ if (i1 < i2)
+ break;
+
+ lasty = last2.y;
+ last2 = last1;
+ --top;
+
+ if (last1.y >= 0)
+ last1 = make_short2(
+ tex1Dfetch(pbaTexColor, TOID(tx, last1.y, size)).x,
+ output[TOID(tx, last1.y, size)].y);
+ }
+
+ // Update the current pointer
+ output[TOID(tx, firsty, size)] = make_short2(current.x, lasty);
+
+ if (lasty >= 0)
+ output[TOID(tx, lasty, size)] = make_short2(firsty, last2.y);
+
+ last1 = last2;
+ last2 = make_short2(current.y, lasty);
+ lasty = firsty;
+ firsty = current.x;
+
+ top = max(1, top + 1);
+
+ // Advance the current pointer to the next one
+ if (firsty >= 0)
+ current =
+ make_short2(tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size)).x,
+ tex1Dfetch(pbaTexColor, TOID(tx, firsty, size)).x);
+ else
+ current = make_short2(MARKER, MARKER);
+ }
+
+ // Update the head and tail pointer.
+ firsty = __mul24(band1, bandSize);
+ lasty = __mul24(band2, bandSize);
+ current = tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size));
+
+ if (current.y == MARKER && current.x < 0) { // No head?
+ last1 = tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size));
+
+ if (last1.y == MARKER)
+ current.x = last1.x;
+ else
+ current.x = lasty;
+
+ output[TOID(tx, firsty, size)] = current;
+ }
+
+ firsty = __mul24(band1, bandSize) + bandSize - 1;
+ lasty = __mul24(band2, bandSize) + bandSize - 1;
+ current = tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size));
+
+ if (current.x == MARKER && current.y < 0) { // No tail?
+ last1 = tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size));
+
+ if (last1.x == MARKER)
+ current.y = last1.y;
+ else
+ current.y = firsty;
+
+ output[TOID(tx, lasty, size)] = current;
+ }
+}
+
+__global__ void kernelDoubleToSingleList(short2 *output, int size) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int ty = blockIdx.y;
+ int id = TOID(tx, ty, size);
+
+ output[id] = make_short2(tex1Dfetch(pbaTexColor, id).x,
+ tex1Dfetch(pbaTexLinks, id).y);
+}
+
+__global__ void kernelColor(short2 *output, int size) {
+ __shared__ short2 s_last1[BLOCKSIZE], s_last2[BLOCKSIZE];
+ __shared__ int s_lasty[BLOCKSIZE];
+
+ int col = threadIdx.x;
+ int tid = threadIdx.y;
+ int tx = __mul24(blockIdx.x, blockDim.x) + col;
+ int dx, dy, lasty;
+ unsigned int best, dist;
+ short2 last1, last2;
+
+ if (tid == blockDim.y - 1) {
+ lasty = size - 1;
+
+ last2 = tex1Dfetch(pbaTexColor, __mul24(lasty, size) + tx);
+
+ if (last2.x == MARKER) {
+ lasty = last2.y;
+ last2 = tex1Dfetch(pbaTexColor, __mul24(lasty, size) + tx);
+ }
+
+ if (last2.y >= 0)
+ last1 = tex1Dfetch(pbaTexColor, __mul24(last2.y, size) + tx);
+
+ s_last1[col] = last1;
+ s_last2[col] = last2;
+ s_lasty[col] = lasty;
+ }
+
+ __syncthreads();
+
+ for (int ty = size - 1 - tid; ty >= 0; ty -= blockDim.y) {
+ last1 = s_last1[col];
+ last2 = s_last2[col];
+ lasty = s_lasty[col];
+
+ dx = last2.x - tx;
+ dy = lasty - ty;
+ best = dist = __mul24(dx, dx) + __mul24(dy, dy);
+
+ while (last2.y >= 0) {
+ dx = last1.x - tx;
+ dy = last2.y - ty;
+ dist = __mul24(dx, dx) + __mul24(dy, dy);
+
+ if (dist > best)
+ break;
+
+ best = dist;
+ lasty = last2.y;
+ last2 = last1;
+
+ if (last2.y >= 0)
+ last1 = tex1Dfetch(pbaTexColor, __mul24(last2.y, size) + tx);
+ }
+
+ __syncthreads();
+
+ output[TOID(tx, ty, size)] = make_short2(last2.x, lasty);
+
+ if (tid == blockDim.y - 1) {
+ s_last1[col] = last1;
+ s_last2[col] = last2;
+ s_lasty[col] = lasty;
+ }
+
+ __syncthreads();
+ }
+} \ No newline at end of file