aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Fadel <samuelfadel@gmail.com>2015-12-19 12:29:33 +0100
committerSamuel Fadel <samuelfadel@gmail.com>2015-12-19 12:29:33 +0100
commitd7dd95cd9eaa223ff8b86e84e6b1b488ff79bcd5 (patch)
tree89cfe3ccd458767a6836ac59a67b6ebd1ef8ddc1
parent20fb359d2a487f425463f2efe2390c1d34e724d9 (diff)
New rendering (VoronoiSplat) -- incomplete.
* Added voronoi-like splatting to points: the same technique from Messias et al., (2014) * It is now possible to change the projection technique during runtime (possible, but still requires some work)
-rw-r--r--main.cpp25
-rw-r--r--main.h18
-rw-r--r--main_view.qml88
-rw-r--r--pm.pro34
-rw-r--r--skelft.cu869
-rw-r--r--skelft.h106
-rw-r--r--skelft_core.cpp199
-rw-r--r--skelftkernel.h468
-rw-r--r--voronoisplat.cpp407
-rw-r--r--voronoisplat.h103
10 files changed, 2307 insertions, 10 deletions
diff --git a/main.cpp b/main.cpp
index 81498a7..64560d3 100644
--- a/main.cpp
+++ b/main.cpp
@@ -12,12 +12,14 @@
#include "mp.h"
#include "continuouscolorscale.h"
#include "scatterplot.h"
+#include "voronoisplat.h"
#include "historygraph.h"
#include "interactionhandler.h"
#include "selectionhandler.h"
#include "effectivenessobserver.h"
#include "distortionobserver.h"
#include "npdistortion.h"
+#include "skelft.h"
static QObject *mainProvider(QQmlEngine *engine, QJSEngine *scriptEngine)
{
@@ -155,12 +157,16 @@ int main(int argc, char **argv)
m->setSubsample(Ys);
qmlRegisterType<Scatterplot>("PM", 1, 0, "Scatterplot");
+ qmlRegisterType<VoronoiSplat>("PM", 1, 0, "VoronoiSplat");
qmlRegisterType<HistoryGraph>("PM", 1, 0, "HistoryGraph");
+ qmlRegisterType<InteractionHandler>("PM", 1, 0, "InteractionHandler");
qmlRegisterSingletonType<Main>("PM", 1, 0, "Main", mainProvider);
// Set up multisampling
QSurfaceFormat fmt;
fmt.setSamples(16);
+ fmt.setRenderableType(QSurfaceFormat::OpenGL);
+ fmt.setVersion(4, 5);
QSurfaceFormat::setDefaultFormat(fmt);
QQmlApplicationEngine engine(QUrl("qrc:///main_view.qml"));
@@ -184,6 +190,8 @@ int main(int argc, char **argv)
// subsamplePlot->setColorData(arma::zeros<arma::vec>(subsampleSize));
subsamplePlot->setColorScale(&colorScale);
Scatterplot *plot = engine.rootObjects()[0]->findChild<Scatterplot *>("plot");
+ VoronoiSplat *splat = engine.rootObjects()[0]->findChild<VoronoiSplat *>("splat");
+ skelft2DInitialization(splat->width());
// Keep track of the current subsample (in order to save them later, if requested)
QObject::connect(subsamplePlot, SIGNAL(xyChanged(const arma::mat &)),
@@ -191,15 +199,22 @@ int main(int argc, char **argv)
QObject::connect(subsamplePlot, SIGNAL(xyInteractivelyChanged(const arma::mat &)),
m, SLOT(setSubsample(const arma::mat &)));
- // Update LAMP projection as the subsample is modified
+ // Update projection as the subsample is modified
InteractionHandler interactionHandler(X, sampleIndices);
- interactionHandler.setTechnique(InteractionHandler::TECHNIQUE_LAMP);
+ m->setInteractionHandler(&interactionHandler);
QObject::connect(subsamplePlot, SIGNAL(xyChanged(const arma::mat &)),
&interactionHandler, SLOT(setSubsample(const arma::mat &)));
QObject::connect(subsamplePlot, SIGNAL(xyInteractivelyChanged(const arma::mat &)),
&interactionHandler, SLOT(setSubsample(const arma::mat &)));
QObject::connect(&interactionHandler, SIGNAL(subsampleChanged(const arma::mat &)),
plot, SLOT(setXY(const arma::mat &)));
+ m->setTechnique(InteractionHandler::TECHNIQUE_LAMP);
+
+ // Update splat whenever the main plot is also updated
+ QObject::connect(plot, SIGNAL(xyChanged(const arma::mat &)),
+ splat, SLOT(setPoints(const arma::mat &)));
+ QObject::connect(plot, SIGNAL(colorDataChanged(const arma::vec &)),
+ splat, SLOT(setValues(const arma::vec &)));
// Linking between selections in subsample plot and full dataset plot
SelectionHandler selectionHandler(sampleIndices);
@@ -237,7 +252,11 @@ int main(int argc, char **argv)
subsamplePlot->setXY(Ys);
subsamplePlot->setColorData(labels(sampleIndices));
plot->setColorScale(&colorScale);
+ splat->setColorScale(&colorScale);
plot->setColorData(labels);
- return app.exec();
+ auto ret = app.exec();
+
+ skelft2DDeinitialization();
+ return ret;
}
diff --git a/main.h b/main.h
index 01d62f7..474c7c2 100644
--- a/main.h
+++ b/main.h
@@ -4,6 +4,8 @@
#include <QObject>
#include <armadillo>
+#include "interactionhandler.h"
+
class Main : public QObject
{
Q_OBJECT
@@ -38,6 +40,16 @@ public:
Q_INVOKABLE void setSubsampleSavePath(const std::string &path) { m_subsampleSavePath = path; }
Q_INVOKABLE void setSubsampleSavePath(const QString &path) { setSubsampleSavePath(path.toStdString()); }
+ void setInteractionHandler(InteractionHandler *interactionHandler) {
+ m_interactionHandler = interactionHandler;
+ }
+
+ Q_INVOKABLE void setTechnique(int technique) {
+ if (m_interactionHandler) {
+ m_interactionHandler->setTechnique((InteractionHandler::Technique) technique);
+ }
+ }
+
arma::mat X() const { return m_dataset.cols(0, m_dataset.n_cols - 2); }
arma::vec labels() const { return m_dataset.col(m_dataset.n_cols - 1); }
@@ -53,13 +65,17 @@ public slots:
}
private:
- Main(QObject *parent = 0) : QObject(parent) {}
+ Main(QObject *parent = 0)
+ : QObject(parent)
+ , m_interactionHandler(0)
+ {}
~Main() {}
Q_DISABLE_COPY(Main)
arma::mat m_dataset, m_subsample;
arma::uvec m_subsampleIndices;
std::string m_indicesSavePath, m_subsampleSavePath;
+ InteractionHandler *m_interactionHandler;
};
#endif // MAIN_H
diff --git a/main_view.qml b/main_view.qml
index 0cd29b5..62c3a67 100644
--- a/main_view.qml
+++ b/main_view.qml
@@ -6,10 +6,13 @@ import QtQuick.Layouts 1.2
import PM 1.0
ApplicationWindow {
+ id: mainWindow
title: "Projection Manipulation"
visible: true
- width: 900
- height: 600
+ minimumWidth: 900
+ minimumHeight: 600
+ maximumWidth: minimumWidth
+ maximumHeight: minimumHeight
menuBar: MenuBar {
Menu {
@@ -19,6 +22,26 @@ ApplicationWindow {
}
Menu {
+ title: "Technique"
+ MenuItem {
+ action: lampTechniqueAction
+ exclusiveGroup: techniqueGroup
+ }
+ MenuItem {
+ action: lspTechniqueAction
+ exclusiveGroup: techniqueGroup
+ }
+ MenuItem {
+ action: plmpTechniqueAction
+ exclusiveGroup: techniqueGroup
+ }
+ MenuItem {
+ action: pekalskaTechniqueAction
+ exclusiveGroup: techniqueGroup
+ }
+ }
+
+ Menu {
title: "View"
MenuItem {
action: labelColorAction
@@ -68,10 +91,18 @@ ApplicationWindow {
border.width: 1
border.color: "#cccccc"
+ VoronoiSplat {
+ id: splat
+ objectName: "splat"
+ width: 256
+ height: 256
+ }
+
Scatterplot {
id: plot
objectName: "plot"
anchors.fill: parent
+ visible: false
}
}
}
@@ -108,6 +139,51 @@ ApplicationWindow {
}
ExclusiveGroup {
+ id: techniqueGroup
+
+ Action {
+ id: lampTechniqueAction
+ text: "LAMP"
+ shortcut: "Ctrl+1"
+ checked: true
+ checkable: true
+ onTriggered: {
+ Main.setTechnique(InteractionHandler.TECHNIQUE_LAMP)
+ }
+ }
+
+ Action {
+ id: lspTechniqueAction
+ text: "LSP"
+ shortcut: "Ctrl+2"
+ checkable: true
+ onTriggered: {
+ Main.setTechnique(InteractionHandler.TECHNIQUE_LSP)
+ }
+ }
+
+ Action {
+ id: plmpTechniqueAction
+ text: "PLMP"
+ shortcut: "Ctrl+3"
+ checkable: true
+ onTriggered: {
+ Main.setTechnique(InteractionHandler.TECHNIQUE_PLMP)
+ }
+ }
+
+ Action {
+ id: pekalskaTechniqueAction
+ text: "Pekalska"
+ shortcut: "Ctrl+4"
+ checkable: true
+ onTriggered: {
+ Main.setTechnique(InteractionHandler.TECHNIQUE_PEKALSKA)
+ }
+ }
+ }
+
+ ExclusiveGroup {
id: coloringGroup
Action {
@@ -116,7 +192,7 @@ ApplicationWindow {
shortcut: "Shift+L"
checked: true
checkable: true
- onTriggered: console.log("Labels")
+ onTriggered: console.log("stub: Labels")
}
Action {
@@ -124,7 +200,7 @@ ApplicationWindow {
text: "Neighborhood Preservation"
shortcut: "Shift+N"
checkable: true
- onTriggered: console.log("NP")
+ onTriggered: console.log("stub: NP")
}
Action {
@@ -132,7 +208,7 @@ ApplicationWindow {
text: "Stress"
shortcut: "Shift+S"
checkable: true
- onTriggered: console.log("Stress")
+ onTriggered: console.log("stub: Stress")
}
Action {
@@ -140,7 +216,7 @@ ApplicationWindow {
text: "Silhouette"
shortcut: "Shift+T"
checkable: true
- onTriggered: console.log("Silhouette")
+ onTriggered: console.log("stub: Silhouette")
}
}
}
diff --git a/pm.pro b/pm.pro
index 88a4b61..7cc6bdc 100644
--- a/pm.pro
+++ b/pm.pro
@@ -1,5 +1,7 @@
QT += qml quick widgets
+CONFIG += qt debug
+
QMAKE_CXXFLAGS += -std=c++11 -fopenmp
QMAKE_LIBS += -larmadillo -fopenmp
HEADERS += main.h \
@@ -8,6 +10,7 @@ HEADERS += main.h \
geometry.h \
scale.h \
scatterplot.h \
+ voronoisplat.h \
historygraph.h \
interactionhandler.h \
selectionhandler.h \
@@ -15,18 +18,22 @@ HEADERS += main.h \
distortionobserver.h \
distortionmeasure.h \
npdistortion.h \
+ skelft.h \
+ skelftkernel.h
mp.h
SOURCES += main.cpp \
colorscale.cpp \
continuouscolorscale.cpp \
geometry.cpp \
scatterplot.cpp \
+ voronoisplat.cpp \
historygraph.cpp \
interactionhandler.cpp \
selectionhandler.cpp \
effectivenessobserver.cpp \
distortionobserver.cpp \
npdistortion.cpp \
+ skelft_core.cpp \
lamp.cpp \
plmp.cpp \
knn.cpp \
@@ -34,6 +41,33 @@ SOURCES += main.cpp \
tsne.cpp \
measures.cpp \
dist.cpp
+
+OTHER_FILES += skelft.cu
+
+# Cuda settings
+CUDA_SOURCES += skelft.cu
+CUDA_DIR = "/opt/cuda"
+CUDA_ARCH = sm_30
+NVCC_OPTIONS += --use_fast_math
+SYSTEM_TYPE = 64 # Either 64 or empty
+INCLUDEPATH += $$CUDA_DIR/include
+QMAKE_LIBDIR += $$CUDA_DIR/lib$$SYSTEM_TYPE
+LIBS += -lcuda -lcudart
+
+CONFIG(debug, debug|release) {
+ cuda_dbg.input = CUDA_SOURCES
+ cuda_dbg.output = ${QMAKE_FILE_BASE}_cuda.o
+ cuda_dbg.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG -g $$NVCC_OPTIONS -I$$INCLUDEPATH $$LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+ cuda_dbg.dependency_type = TYPE_C
+ QMAKE_EXTRA_COMPILERS += cuda_dbg
+} else {
+ cuda.input = CUDA_SOURCES
+ cuda.output = ${QMAKE_FILE_BASE}_cuda.o
+ cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS -I$$INCLUDEPATH $$LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+ cuda.dependency_type = TYPE_C
+ QMAKE_EXTRA_COMPILERS += cuda
+}
+
RESOURCES += pm.qrc
target.path = .
diff --git a/skelft.cu b/skelft.cu
new file mode 100644
index 0000000..be7662c
--- /dev/null
+++ b/skelft.cu
@@ -0,0 +1,869 @@
+#include <device_functions.h>
+#include "skelft.h"
+#include <cstdio>
+#include <iostream>
+
+
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+#include <thrust/sort.h>
+
+
+
+using namespace std;
+
+
+// Parameters for CUDA kernel executions; more or less optimized for a 1024x1024 image.
+#define BLOCKX 16
+#define BLOCKY 16
+#define BLOCKSIZE 64
+#define TILE_DIM 32
+#define BLOCK_ROWS 16
+
+
+
+/****** Global Variables *******/
+const int NB = 7; // Nr buffers we use and store in the entire framework
+short2 **pbaTextures; // Work buffers used to compute and store resident images
+// 0: work buffer
+// 1: FT
+// 2: thresholded DT or exact floating-point DT
+// 3: thresholded skeleton
+// 4: topology analysis
+// 5: work buffer for topology
+// 6: skeleton FT
+//
+
+float* pbaTexSiteParam; // Stores boundary parameterization
+float2* pbaTexAdvectSites;
+float* pbaTexDensityData; // The density map of the sites
+float2* pbaTexGradientData; // The gradient of the density map
+
+int pbaTexSize; // Texture size (squared) actually used in all computations
+int floodBand = 4, // Various FT computation parameters; defaults are good for an 1024x1024 image.
+ maurerBand = 4,
+ colorBand = 4;
+
+texture<short2> pbaTexColor; // 2D textures (bound to various buffers defined above as needed)
+texture<short2> pbaTexColor2;
+texture<short2> pbaTexLinks;
+texture<float> pbaTexParam; // 1D site parameterization texture (bound to pbaTexSiteParam)
+texture<unsigned char>
+ pbaTexGray; // 2D texture of unsigned char values, e.g. the binary skeleton
+texture<float2> pbaTexAdvect;
+texture<float,cudaTextureType2D,cudaReadModeElementType> pbaTexDensity;
+texture<float2,cudaTextureType2D,cudaReadModeElementType> pbaTexGradient;
+texture<float,cudaTextureType2D,cudaReadModeElementType> pbaTexDT;
+
+
+__device__ bool fill_gc; //Indicates if a fill-sweep did fill anything or not
+
+
+#if __CUDA_ARCH__ < 110 // We cannot use atomic intrinsics on SM10 or below. Thus, we define these as nop.
+#define atomicInc(a,b) 0 // The default will be that some code e.g. endpoint detection will thus not do anything.
+#endif
+
+
+
+/********* Kernels ********/
+#include "skelftkernel.h"
+
+
+
+// Initialize necessary memory (CPU/GPU sides)
+// - textureSize: The max size of any image we will process until re-initialization
+void skelft2DInitialization(int maxTexSize)
+{
+ int pbaMemSize = maxTexSize * maxTexSize * sizeof(short2); // A buffer has 2 shorts / pixel
+ pbaTextures = (short2**) malloc(NB * sizeof(short2*)); // We will use NB buffers
+
+ for(int i=0;i<NB;++i)
+ cudaMalloc((void **) &pbaTextures[i], pbaMemSize); // Allocate work buffer 'i' (on GPU)
+
+ cudaMalloc((void**) &pbaTexSiteParam, maxTexSize * maxTexSize * sizeof(float)); // Sites texture (for FT)
+ cudaMalloc((void**) &pbaTexAdvectSites, maxTexSize * maxTexSize * sizeof(float2)); // Sites 2D-coords
+ cudaMalloc((void**) &pbaTexDensityData, maxTexSize * maxTexSize * sizeof(float));
+ cudaMalloc((void**) &pbaTexGradientData, maxTexSize * maxTexSize * sizeof(float2));
+}
+
+
+
+
+// Deallocate all allocated memory
+void skelft2DDeinitialization()
+{
+ for(int i=0;i<NB;++i) cudaFree(pbaTextures[i]);
+ cudaFree(pbaTexSiteParam);
+ cudaFree(pbaTexAdvectSites);
+ cudaFree(pbaTexDensityData);
+ cudaFree(pbaTexGradientData);
+ free(pbaTextures);
+}
+
+
+// Initialize the Voronoi textures from the sites' encoding texture (parameterization)
+// REMARK: we interpret 'inputVoro' as a 2D texture, as it's much easier/faster like this
+__global__ void kernelSiteParamInit(short2* inputVoro, int size)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx<size && ty<size) // Careful not to go outside the image..
+ {
+ int i = TOID(tx,ty,size);
+ // The sites-param has non-zero (parameter) values precisely on boundary points
+ float param = tex1Dfetch(pbaTexParam,i);
+
+ short2& v = inputVoro[i];
+ // Non-boundary points are marked as 0 in the parameterization. Here we will compute the FT.
+ v.x = v.y = MARKER;
+ // These are points which define the 'sites' to compute the FT/skeleton (thus, have FT==identity)
+ // We could use an if-then-else here, but it's faster with an if-then
+ if (param>0)
+ {
+ v.x = tx;
+ v.y = ty;
+ }
+ }
+}
+
+
+
+void skelft2DInitializeInput(float* sites, int size) // Copy input sites from CPU to GPU; Also set up site param initialization in pbaTextures[0]
+{
+ pbaTexSize = size; // Size of the actual texture being used in this run; can be smaller than the max-tex-size
+ // which was used in skelft2DInitialization()
+
+ cudaMemcpy(pbaTexSiteParam, sites, pbaTexSize * pbaTexSize * sizeof(float), cudaMemcpyHostToDevice);
+ // Pass sites parameterization to CUDA. Must be done before calling the initialization
+ // kernel, since we use the sites-param as a texture in that kernel
+ cudaBindTexture(0, pbaTexParam, pbaTexSiteParam); // Bind the sites-param as a 1D texture so we can quickly index it next
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+ kernelSiteParamInit<<<grid,block>>>(pbaTextures[0],pbaTexSize); // Do the site param initialization. This sets up pbaTextures[0]
+ cudaUnbindTexture(pbaTexParam);
+}
+
+
+
+
+
+// In-place transpose a squared texture.
+// Block orders are modified to optimize memory access.
+// Point coordinates are also swapped.
+void pba2DTranspose(short2 *texture)
+{
+ dim3 block(TILE_DIM, BLOCK_ROWS);
+ dim3 grid(pbaTexSize / TILE_DIM, pbaTexSize / TILE_DIM);
+
+ cudaBindTexture(0, pbaTexColor, texture);
+ kernelTranspose<<< grid, block >>>(texture, pbaTexSize);
+ cudaUnbindTexture(pbaTexColor);
+}
+
+// Phase 1 of PBA. m1 must divides texture size
+void pba2DPhase1(int m1, short xm, short ym, short xM, short yM)
+{
+ dim3 block = dim3(BLOCKSIZE);
+ dim3 grid = dim3(pbaTexSize / block.x, m1);
+
+ // Flood vertically in their own bands
+ cudaBindTexture(0, pbaTexColor, pbaTextures[0]);
+ kernelFloodDown<<< grid, block >>>(pbaTextures[1], pbaTexSize, pbaTexSize / m1);
+ cudaUnbindTexture(pbaTexColor);
+
+ cudaBindTexture(0, pbaTexColor, pbaTextures[1]);
+ kernelFloodUp<<< grid, block >>>(pbaTextures[1], pbaTexSize, pbaTexSize / m1);
+
+ // Passing information between bands
+ grid = dim3(pbaTexSize / block.x, m1);
+ kernelPropagateInterband<<< grid, block >>>(pbaTextures[0], pbaTexSize, pbaTexSize / m1);
+
+ cudaBindTexture(0, pbaTexLinks, pbaTextures[0]);
+ kernelUpdateVertical<<< grid, block >>>(pbaTextures[1], pbaTexSize, m1, pbaTexSize / m1);
+ cudaUnbindTexture(pbaTexLinks);
+ cudaUnbindTexture(pbaTexColor);
+}
+
+// Phase 2 of PBA. m2 must divides texture size
+void pba2DPhase2(int m2)
+{
+ // Compute proximate points locally in each band
+ dim3 block = dim3(BLOCKSIZE);
+ dim3 grid = dim3(pbaTexSize / block.x, m2);
+ cudaBindTexture(0, pbaTexColor, pbaTextures[1]);
+ kernelProximatePoints<<< grid, block >>>(pbaTextures[0], pbaTexSize, pbaTexSize / m2);
+
+ cudaBindTexture(0, pbaTexLinks, pbaTextures[0]);
+ kernelCreateForwardPointers<<< grid, block >>>(pbaTextures[0], pbaTexSize, pbaTexSize / m2);
+
+ // Repeatly merging two bands into one
+ for (int noBand = m2; noBand > 1; noBand /= 2) {
+ grid = dim3(pbaTexSize / block.x, noBand / 2);
+ kernelMergeBands<<< grid, block >>>(pbaTextures[0], pbaTexSize, pbaTexSize / noBand);
+ }
+
+ // Replace the forward link with the X coordinate of the seed to remove
+ // the need of looking at the other texture. We need it for coloring.
+ grid = dim3(pbaTexSize / block.x, pbaTexSize);
+ kernelDoubleToSingleList<<< grid, block >>>(pbaTextures[0], pbaTexSize);
+ cudaUnbindTexture(pbaTexLinks);
+ cudaUnbindTexture(pbaTexColor);
+}
+
+// Phase 3 of PBA. m3 must divides texture size
+void pba2DPhase3(int m3)
+{
+ dim3 block = dim3(BLOCKSIZE / m3, m3);
+ dim3 grid = dim3(pbaTexSize / block.x);
+ cudaBindTexture(0, pbaTexColor, pbaTextures[0]);
+ kernelColor<<< grid, block >>>(pbaTextures[1], pbaTexSize);
+ cudaUnbindTexture(pbaTexColor);
+}
+
+
+
+void skel2DFTCompute(short xm, short ym, short xM, short yM, int floodBand, int maurerBand, int colorBand)
+{
+ pba2DPhase1(floodBand,xm,ym,xM,yM); //Vertical sweep
+
+ pba2DTranspose(pbaTextures[1]); //
+
+ pba2DPhase2(maurerBand); //Horizontal coloring
+
+ pba2DPhase3(colorBand); //Row coloring
+
+ pba2DTranspose(pbaTextures[1]);
+}
+
+
+
+
+
+__global__ void kernelThresholdDT(unsigned char* output, int size, float threshold2, short xm, short ym, short xM, short yM)
+//Input: pbaTexColor: closest-site-ids per pixel, i.e. FT
+//Output: output: thresholded DT
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx>xm && ty>ym && tx<xM && ty<yM) //careful not to index outside the image..
+ {
+ int id = TOID(tx, ty, size);
+ short2 voroid = tex1Dfetch(pbaTexColor,id); //get the closest-site to tx,ty into voroid.x,.y
+ float d2 = (tx-voroid.x)*(tx-voroid.x)+(ty-voroid.y)*(ty-voroid.y);
+ output[id] = (d2<=threshold2); //threshold DT into binary image
+ }
+}
+
+
+
+__global__ void kernelDT(float* output, int size, short xm, short ym, short xM, short yM)
+//Input: pbaTexColor: closest-site-ids per pixel, i.e. FT
+//Output: output: DT
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx>xm && ty>ym && tx<xM && ty<yM) //careful not to index outside the image..
+ {
+ int id = TOID(tx, ty, size);
+ short2 voroid = tex1Dfetch(pbaTexColor,id); //get the closest-site to tx,ty into voroid.x,.y
+ float d2 = (tx-voroid.x)*(tx-voroid.x)+(ty-voroid.y)*(ty-voroid.y);
+ output[id] = sqrtf(d2); //save the Euclidean DT
+ }
+}
+
+
+__global__ void kernelSkel(unsigned char* output, short xm, short ym,
+ short xM, short yM, short size, float threshold, float length)
+ //Input: pbaTexColor: closest-site-ids per pixel
+ // pbaTexParam: labels for sites (only valid at site locations)
+{ //Output: output: binary thresholded skeleton
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx>xm && ty>ym && tx<xM && ty<yM)
+ {
+ int id = TOID(tx, ty, size);
+ int Id = id;
+ short2 voroid = tex1Dfetch(pbaTexColor,id); //get the closest-site to tx,ty into voroid.x,.y
+ int id2 = TOID(voroid.x,voroid.y,size); //convert the site's coord to an index into pbaTexParam[], the site-label-texture
+ float imp = tex1Dfetch(pbaTexParam,id2); //get the site's label
+
+ ++id; //TOID(tx+1,ty,size)
+ voroid = tex1Dfetch(pbaTexColor,id); //
+ id2 = TOID(voroid.x,voroid.y,size); //
+ float imp_r = tex1Dfetch(pbaTexParam,id2); //
+
+ id += size-1; //TOID(tx,ty+1,size)
+ voroid = tex1Dfetch(pbaTexColor,id); //
+ id2 = TOID(voroid.x,voroid.y,size); //
+ float imp_u = tex1Dfetch(pbaTexParam,id2); //
+
+ float imp_dx = fabsf(imp_r-imp);
+ float imp_dy = fabsf(imp_u-imp);
+ float Imp = max(imp_dx,imp_dy);
+ Imp = min(Imp,fabsf(length-Imp));
+ if (Imp>=threshold) output[Id] = 1; //By filling only 1-values, we reduce memory access somehow (writing to output[] is expensive)
+ }
+
+ //WARNING: this kernel may sometimes creates 2-pixel-thick branches.. Study the AFMM original code to see if this is correct.
+}
+
+
+
+#define X 1
+
+__constant__ const //REMARK: put following constants (for kernelTopology) in CUDA constant-memory, as this gives a huge speed difference
+unsigned char topo_patterns[][9] = { {0,0,0, //These are the 3x3 templates that we use to detect skeleton endpoints
+ 0,X,0, //(with four 90-degree rotations for each)
+ 0,X,0},
+ {0,0,0,
+ 0,X,0,
+ 0,0,X},
+ {0,0,0,
+ 0,X,0,
+ 0,X,X},
+ {0,0,0,
+ 0,X,0,
+ X,X,0}
+ };
+
+#define topo_NPATTERNS 4 //Number of patterns we try to match (for kernelTopology)
+ //REMARK: #define faster than __constant__
+
+__constant__ const unsigned char topo_rot[][9] = { {0,1,2,3,4,5,6,7,8}, {2,5,8,1,4,7,0,3,6}, {8,7,6,5,4,3,2,1,0}, {6,3,0,7,4,1,8,5,2} };
+ //These encode the four 90-degree rotations of the patterns (for kernelTopology);
+
+__device__ unsigned int topo_gc = 0;
+__device__ unsigned int topo_gc_last = 0;
+
+
+__global__ void kernelTopology(unsigned char* output, short2* output_set, short xm, short ym, short xM, short yM, short size, int maxpts)
+{
+ const int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ const int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ unsigned char t[9];
+
+ if (tx>xm && ty>ym && tx<xM-1 && ty<yM-1) //careful not to index outside the image; take into account the template size too
+ {
+ int id = TOID(tx, ty, size);
+ unsigned char p = tex1Dfetch(pbaTexGray,id); //get the skeleton pixel at tx,ty
+ if (p) //if the pixel isn't skeleton, nothing to do
+ {
+ unsigned char idx=0;
+ for(int j=ty-1;j<=ty+1;++j) //read the template into t[] for easier use
+ {
+ int id = TOID(tx-1, j, size);
+ for(int i=0;i<=2;++i,++id,++idx)
+ t[idx] = tex1Dfetch(pbaTexGray,id); //get the 3x3 template centered at the skel point tx,ty
+ }
+
+ for(unsigned char r=0;r<4;++r) //try to match all rotations of a pattern:
+ {
+ const unsigned char* rr = topo_rot[r];
+ for(unsigned char p=0;p<topo_NPATTERNS;++p) //try to match all patterns:
+ {
+ const unsigned char* pat = topo_patterns[p];
+ unsigned char j = (p==0)? 0 : 7; //Speedup: for all patterns except 1st, check only last 3 entries, the first 6 are identical for all patterns
+ for(;j<9;++j) //try to match rotated pattern vs actual pattern
+ if (pat[j]!=t[rr[j]]) break; //this rotation failed
+ if (j<6) break; //Speedup: if we have a mismatch on the 1st 6 pattern entries, then none of the patterns can match
+ // since all templates have the same first 6 entries.
+
+ if (j==9) //this rotation succeeded: mark the pixel as a topology event and we're done
+ {
+ int crt_gc = atomicInc(&topo_gc,maxpts); //REMARK: this serializes (compacts) all detected endpoints in one array.
+ output_set[crt_gc] = make_short2(tx,ty); //To do this, we use an atomic read-increment-return on a global counter,
+ //which is guaranteed to give all threads unique consecutive indexes in the array.
+ output[id] = 1; //Also create the topology image
+ return;
+ }
+ }
+ }
+ }
+ }
+ else //Last thread: add zero-marker to the output point-set, so the reader knows how many points are really in there
+ if (tx==xM-1 && ty==yM-1) //Also reset the global vector counter topo_gc, for the next parallel-run of this function
+ { topo_gc_last = topo_gc; topo_gc = 0; } //We do this in the last thread so that no one modifies topo_gc from now on.
+ //REMARK: this seems to be the only way I can read a __device__ variable back to the CPU
+}
+
+
+
+
+void skelft2DParams(int floodBand_, int maurerBand_, int colorBand_) //Set up some params of the FT algorithm
+{
+ floodBand = floodBand_;
+ maurerBand = maurerBand_;
+ colorBand = colorBand_;
+}
+
+
+
+
+
+// Compute 2D FT / Voronoi diagram of a set of sites
+// siteParam: Site parameterization. 0 = non-site points; >0 = site parameter value.
+// output: FT. The (x,y) at (i,j) are the coords of the closest site to (i,j)
+// size: Texture size (pow 2)
+void skelft2DFT(short* output, float* siteParam, short xm, short ym, short xM, short yM, int size)
+{
+ skelft2DInitializeInput(siteParam,size); // Initialization of already-allocated data structures
+
+ skel2DFTCompute(xm, ym, xM, yM, floodBand, maurerBand, colorBand); // Compute FT
+
+ // Copy FT to CPU, if required
+ if (output) cudaMemcpy(output, pbaTextures[1], size*size*sizeof(short2), cudaMemcpyDeviceToHost);
+}
+
+
+
+
+
+
+void skelft2DDT(float* outputDT, short xm, short ym, short xM, short yM) //Compute exact DT from resident FT (in pbaTextures[1])
+{
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+
+ cudaBindTexture(0, pbaTexColor, pbaTextures[1]); //Used to read the FT from
+
+ xm = ym = 0; xM = yM = pbaTexSize-1;
+ kernelDT <<< grid, block >>>((float*)pbaTextures[2], pbaTexSize, xm-1, ym-1, xM+1, yM+1);
+ cudaUnbindTexture(pbaTexColor);
+
+ //Copy DT to CPU
+ if (outputDT) cudaMemcpy(outputDT, pbaTextures[2], pbaTexSize * pbaTexSize * sizeof(float), cudaMemcpyDeviceToHost);
+}
+
+
+
+void skelft2DDT(short* outputDT, float threshold, //Compute (thresholded) DT (into pbaTextures[2]) from resident FT (in pbaTextures[1])
+ short xm, short ym, short xM, short yM)
+{
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+ cudaBindTexture(0, pbaTexColor, pbaTextures[1]); //Used to read the FT from
+
+ if (threshold>=0)
+ {
+ xm -= (short)threshold; if (xm<0) xm=0;
+ ym -= (short)threshold; if (ym<0) ym=0;
+ xM += (short)threshold; if (xM>pbaTexSize-1) xM=pbaTexSize-1;
+ yM += (short)threshold; if (yM>pbaTexSize-1) yM=pbaTexSize-1;
+
+ kernelThresholdDT<<< grid, block >>>((unsigned char*)pbaTextures[2], pbaTexSize, threshold*threshold, xm-1, ym-1, xM+1, yM+1);
+ cudaUnbindTexture(pbaTexColor);
+
+ //Copy thresholded image to CPU
+ if (outputDT) cudaMemcpy(outputDT, (unsigned char*)pbaTextures[2], pbaTexSize * pbaTexSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+ }
+}
+
+
+
+
+void skelft2DSkeleton(unsigned char* outputSkel, float length, float threshold, //Compute thresholded skeleton (into pbaTextures[3]) from resident FT (in pbaTextures[1])
+ short xm,short ym,short xM,short yM)
+{ //length: boundary length
+ dim3 block = dim3(BLOCKX,BLOCKY); //threshold: skeleton importance min-value (below this, we ignore branches)
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+ cudaBindTexture(0, pbaTexColor, pbaTextures[1]); //Used to read the resident FT
+ cudaBindTexture(0, pbaTexParam, pbaTexSiteParam); //Used to read the resident boundary parameterization
+ cudaMemset(pbaTextures[3],0,sizeof(unsigned char)*pbaTexSize*pbaTexSize); //Faster to zero result and then fill only 1-values (see kernel)
+
+ kernelSkel<<< grid, block >>>((unsigned char*)pbaTextures[3], xm, ym, xM-1, yM-1, pbaTexSize, threshold, length);
+
+ cudaUnbindTexture(pbaTexColor);
+ cudaUnbindTexture(pbaTexParam);
+
+ //Copy skeleton to CPU
+ if (outputSkel) cudaMemcpy(outputSkel, pbaTextures[3], pbaTexSize * pbaTexSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+}
+
+
+
+
+void skelft2DTopology(unsigned char* outputTopo, int* npts, short* outputPoints, //Compute topology-points of the resident skeleton (in pbaTextures[3])
+ short xm,short ym,short xM,short yM)
+{
+ int maxpts = (npts)? *npts : pbaTexSize*pbaTexSize; //This is the max # topo-points we are going to return in outputPoints[]
+
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+ cudaBindTexture(0, pbaTexGray, pbaTextures[3]); //Used to read the resident skeleton
+ cudaMemset(pbaTextures[4],0,sizeof(unsigned char)*pbaTexSize*pbaTexSize); //Faster to zero result and then fill only 1-values (see kernel)
+
+ unsigned int zero = 0;
+ cudaMemcpyToSymbol(topo_gc,&zero,sizeof(unsigned int),0,cudaMemcpyHostToDevice); //Set topo_gc to 0
+
+ kernelTopology<<< grid, block >>>((unsigned char*)pbaTextures[4], pbaTextures[5], xm, ym, xM, yM, pbaTexSize, maxpts+1);
+
+ cudaUnbindTexture(pbaTexGray);
+
+ if (outputPoints && maxpts) //If output-point vector desired, copy the end-points, put in pbaTexture[5] as a vector of short2's,
+ { //into caller space. We copy only 'maxpts' elements, as the user instructed us.
+ unsigned int num_pts;
+ cudaMemcpyFromSymbol(&num_pts,topo_gc_last,sizeof(unsigned int),0,cudaMemcpyDeviceToHost); //Get #topo-points we have detected from the device-var from CUDA
+ if (npts && num_pts) //Copy the topo-points to caller
+ cudaMemcpy(outputPoints,pbaTextures[5],num_pts*sizeof(short2),cudaMemcpyDeviceToHost);
+ if (npts) *npts = num_pts; //Return #detected topo-points to caller
+ }
+
+ if (outputTopo) //If topology image desired, copy it into user space
+ cudaMemcpy(outputTopo,pbaTextures[4],pbaTexSize*pbaTexSize*sizeof(unsigned char), cudaMemcpyDeviceToHost);
+}
+
+
+
+
+__global__ void kernelSiteFromSkeleton(short2* outputSites, int size) //Initialize the Voronoi textures from the sites' encoding texture (parameterization)
+{ //REMARK: we interpret 'inputVoro' as a 2D texture, as it's much easier/faster like this
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx<size && ty<size) //Careful not to go outside the image..
+ {
+ int i = TOID(tx,ty,size);
+ unsigned char param = tex1Dfetch(pbaTexGray,i); //The sites-param has non-zero (parameter) values precisely on non-boundary points
+
+ short2& v = outputSites[i];
+ v.x = v.y = MARKER; //Non-boundary points are marked as 0 in the parameterization. Here we will compute the FT.
+ if (param) //These are points which define the 'sites' to compute the FT/skeleton (thus, have FT==identity)
+ { //We could use an if-then-else here, but it's faster with an if-then
+ v.x = tx; v.y = ty;
+ }
+ }
+}
+
+
+
+
+__global__ void kernelSkelInterpolate(float* output, int size)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx<size && ty<size) //Careful not to go outside the image..
+ {
+ int id = TOID(tx, ty, size);
+ short2 vid = tex1Dfetch(pbaTexColor,id);
+ float T = sqrtf((tx-vid.x)*(tx-vid.x)+(ty-vid.y)*(ty-vid.y));
+ short2 vid2 = tex1Dfetch(pbaTexColor2,id);
+ float D = sqrtf((tx-vid2.x)*(tx-vid2.x)+(ty-vid2.y)*(ty-vid2.y));
+ float B = ((D)? min(T/2/D,0.5f):0.5) + 0.5*((T)? max(1-D/T,0.0f):0);
+ output[id] = pow(B,0.2f);
+ }
+}
+
+
+
+
+void skel2DSkeletonDT(float* outputSkelDT,short xm,short ym,short xM,short yM)
+{
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+ cudaBindTexture(0,pbaTexGray,pbaTextures[3]); //Used to read the resident binary skeleton
+ kernelSiteFromSkeleton<<<grid,block>>>(pbaTextures[0],pbaTexSize); //1. Init pbaTextures[0] with sites on skeleton i.e. from pbaTexGray
+ cudaUnbindTexture(pbaTexGray);
+
+ //Must first save pbaTextures[1] since we may need it later..
+ cudaMemcpy(pbaTextures[5],pbaTextures[1],pbaTexSize*pbaTexSize*sizeof(short2),cudaMemcpyDeviceToDevice);
+ skel2DFTCompute(xm, ym, xM, yM, floodBand, maurerBand, colorBand); //2. Compute FT of the skeleton into pbaTextures[6]
+ cudaMemcpy(pbaTextures[6],pbaTextures[1],pbaTexSize*pbaTexSize*sizeof(short2),cudaMemcpyDeviceToDevice);
+ cudaMemcpy(pbaTextures[1],pbaTextures[5],pbaTexSize*pbaTexSize*sizeof(short2),cudaMemcpyDeviceToDevice);
+
+ //Compute interpolation
+ cudaBindTexture(0,pbaTexColor,pbaTextures[1]); // FT of boundary
+ cudaBindTexture(0,pbaTexColor2,pbaTextures[6]); // FT of skeleton
+ kernelSkelInterpolate<<<grid,block>>>((float*)pbaTextures[0],pbaTexSize);
+ cudaUnbindTexture(pbaTexColor);
+ cudaUnbindTexture(pbaTexColor2);
+ if (outputSkelDT) cudaMemcpy(outputSkelDT, pbaTextures[0], pbaTexSize * pbaTexSize * sizeof(float), cudaMemcpyDeviceToHost);
+}
+
+
+
+
+
+__global__ void kernelFill(unsigned char* output, int size, unsigned char bg, unsigned char fg, short xm, short ym, short xM, short yM, bool ne)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx>xm && ty>ym && tx<xM && ty<yM) //careful not to index outside the image..
+ {
+ int id0 = TOID(tx, ty, size);
+ unsigned char val = tex1Dfetch(pbaTexGray,id0); //
+ if (val==fg) //do we have a filled pixel? Then fill all to left/top/up/bottom of it which is background
+ {
+ bool fill = false;
+ int id = id0;
+ if (ne) //fill in north-east direction:
+ {
+ for(short x=tx+1;x<xM;++x) //REMARK: here and below, the interesting thing is that it's faster, by about 10-15%, to fill a whole
+ { // scanline rather than oly until the current block's borders (+1). The reason is that filling a whole
+ // scanline decreases the total #sweeps, which seems to be the limiting speed factor
+ if (tex1Dfetch(pbaTexGray,++id)!=bg) break;
+ output[id] = fg; fill = true;
+ }
+
+ id = id0;
+ for(short y=ty-1;y>ym;--y)
+ {
+ if (tex1Dfetch(pbaTexGray,id-=size)!=bg) break;
+ output[id] = fg; fill = true;
+ }
+ }
+ else //fill in south-west direction:
+ {
+ for(short x=tx-1;x>xm;--x)
+ {
+ if (tex1Dfetch(pbaTexGray,--id)!=bg) break;
+ output[id] = fg; fill = true;
+ }
+
+ id = id0;
+ for(short y=ty+1;y<yM;++y)
+ {
+ if (tex1Dfetch(pbaTexGray,id+=size)!=bg) break;
+ output[id] = fg; fill = true;
+ }
+ }
+
+ if (fill) fill_gc = true; //if we filled anything, inform caller; we 'gather' this info from a local var into the
+ //global var here, since it's faster than writing the global var in the for loops
+ }
+ }
+}
+
+
+
+
+__global__ void kernelFillHoles(unsigned char* output, int size, unsigned char bg, unsigned char fg, unsigned char fill_fg)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (tx>=0 && ty>=0 && tx<size && ty<size) //careful not to index outside the image..
+ {
+ int id = TOID(tx, ty, size);
+ unsigned char val = tex1Dfetch(pbaTexGray,id); //
+ if (val==fill_fg)
+ output[id] = bg;
+ else if (val==bg)
+ output[id] = fg;
+ }
+}
+
+
+int skelft2DFill(unsigned char* outputFill, short sx, short sy, short xm, short ym, short xM, short yM, unsigned char fill_value)
+{
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+ unsigned char background;
+ int id = sy * pbaTexSize + sx;
+ cudaMemcpy(&background,(unsigned char*)pbaTextures[2]+id,sizeof(unsigned char),cudaMemcpyDeviceToHost); //See which is the value we have to fill from (sx,sy)
+
+ cudaMemset(((unsigned char*)pbaTextures[2])+id,fill_value,sizeof(unsigned char)); //Fill the seed (x,y) on the GPU
+
+ cudaBindTexture(0, pbaTexGray, pbaTextures[2]); //Used to read the thresholded DT
+
+ int iter=0;
+ bool xy = true; //Direction of filling for current sweep: either north-east or south-west
+ //This kind of balances the memory-accesses nicely over kernel calls
+ for(;;++iter,xy=!xy) //Keep filling a sweep at a time until we have no background pixels anymore
+ {
+ bool filled = false; //Initialize flag: we didn't fill anything in this sweep
+ cudaMemcpyToSymbol(fill_gc,&filled,sizeof(bool),0,cudaMemcpyHostToDevice); //Pass flag to CUDA
+ kernelFill<<<grid, block>>>((unsigned char*)pbaTextures[2],pbaTexSize,background,fill_value,xm,ym,xM,yM,xy);
+ //One fill sweep
+ cudaMemcpyFromSymbol(&filled,fill_gc,sizeof(bool),0,cudaMemcpyDeviceToHost); //See if we filled anything in this sweep
+ if (!filled) break; //Nothing filled? Then we're done, the image didn't change
+ }
+ cudaUnbindTexture(pbaTexGray);
+
+ if (outputFill) cudaMemcpy(outputFill, (unsigned char*)pbaTextures[2], pbaTexSize * pbaTexSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+
+ return iter; //Return #iterations done for the fill - useful as a performance measure for caller
+}
+
+
+
+int skelft2DFillHoles(unsigned char* outputFill, short sx, short sy, unsigned char foreground)
+{
+ unsigned char background;
+ unsigned char fill_value = 128;
+ int id = sy * pbaTexSize + sx;
+ cudaMemcpy(&background,(unsigned char*)pbaTextures[2]+id,sizeof(unsigned char),cudaMemcpyDeviceToHost); //See which is the value at (sx,sy)
+
+ int iter = skelft2DFill(0,sx,sy,0,0,pbaTexSize,pbaTexSize,fill_value); //First, fill the background surrounding the image with some special value
+
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+
+ cudaBindTexture(0, pbaTexGray, pbaTextures[2]); //Used to read the thresholded DT
+
+ kernelFillHoles<<<grid, block>>>((unsigned char*)pbaTextures[2],pbaTexSize,background,foreground,fill_value);
+
+ cudaUnbindTexture(pbaTexGray);
+
+ if (outputFill) cudaMemcpy(outputFill, (unsigned char*)pbaTextures[2], pbaTexSize * pbaTexSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+
+ return iter;
+}
+
+
+
+//-------- Advection-related code ----------------------------------------------------------------------
+
+
+void advect2DSetSites(float* sites, int nsites)
+{
+ cudaMemcpy(pbaTexAdvectSites, sites, nsites * sizeof(float2), cudaMemcpyHostToDevice); //Copy side-coords from CPU->GPU
+}
+
+//#define KERNEL(radius,dist) expf(-10*dist/radius)
+#define KERNEL(radius,dist) ((radius-dist)/radius)
+
+
+
+__global__ void kernelSplat(float* output, int size, int numpts, float d)
+//REMARK: This could be done tens of times faster using OpenGL (draw point-sprites with a 2D distance-kernel texture)
+{
+ int offs = blockIdx.x * blockDim.x + threadIdx.x;
+
+ if (offs < numpts) //careful not to index outside the site-vector..
+ {
+ float2 p = tex1Dfetch(pbaTexAdvect,offs); //splat the site whose coords are at location offs in pbaTexAdvect[]
+
+ float d2 = d*d;
+ short tx = p.x, ty = p.y; //REMARK: if I make these float, then something gets ccrewed up - why?
+ short jmin = ty-d;
+ short jmax = ty+d;
+
+ for(short j=jmin;j<=jmax;++j) //bounding-box of the splat at 'skel'
+ {
+ float dy = (j-ty)*(j-ty); //precalc this for speed
+ short imin = tx-d;
+ short imax = tx+d;
+
+ int offs = __mul24(int(j),size);
+
+ for(short i=imin;i<=imax;++i) //REMARK: this could be made ~1.5x faster by computing the x-scanline intersection (min,max) with the d2-circle..
+ {
+ float r2 = dy+(i-tx)*(i-tx);
+ if (r2<=d2) //check we're really inside the splat
+ {
+ int id = int(i)+offs;
+ float val = KERNEL(d2,r2);
+ output[id] += val; //Accumulate density (WARNING: hope this is thread-safe!!)
+ //If not, I need atomicAdd() on floats which requires CUDA 2.0
+ }
+ }
+ }
+ }
+}
+
+
+inline __device__ float2 computeGradient(const float2& p) //Compute -gradient of density at p
+{
+ float v_d = tex2D(pbaTexDensity,p.x,p.y-1);
+ float v_l = tex2D(pbaTexDensity,p.x-1,p.y);
+ float v_r = tex2D(pbaTexDensity,p.x+1,p.y);
+ float v_t = tex2D(pbaTexDensity,p.x,p.y+1);
+
+ return make_float2((v_l-v_r)/2,(v_d-v_t)/2);
+}
+
+
+inline __device__ float2 computeGradientExplicit(float* density, const float2& p, int size) //Compute gradient of density at p
+{
+ int x = p.x, y = p.y;
+ float v_d = density[TOID(x,y-1,size)];
+ float v_l = density[TOID(x-1,y,size)];
+ float v_r = density[TOID(x+1,y,size)];
+ float v_t = density[TOID(x,y+1,size)];
+
+ return make_float2((v_r-v_l)/2,(v_t-v_d)/2);
+}
+
+
+__global__ void kernelGradient(float2* grad, int size)
+{
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * blockDim.y + threadIdx.y;
+
+ const float eps = 0.0001;
+
+ if (tx<size && ty<size) //Careful not to go outside the image..
+ {
+ float2 p = make_float2(tx,ty);
+ float2 g = computeGradient(p);
+
+ float gn = sqrtf(g.x*g.x+g.y*g.y); //robustly normalize the gradient
+ g.x /= gn+eps; g.y /= gn+eps;
+
+ grad[TOID(tx,ty,size)] = g;
+ }
+}
+
+
+void advect2DSplat(float* output, int num_pts, float radius)
+{
+ dim3 block = dim3(BLOCKX*BLOCKY); //Prepare the splatting kernel: this operates on a vector of 2D sites
+ int numpts_b = (num_pts/block.x+1)*block.x; //Find higher multiple of blocksize than # sites
+ dim3 grid = dim3(numpts_b/block.x);
+
+ cudaMemset(pbaTexDensityData,0,sizeof(float)*pbaTexSize*pbaTexSize); //Zero the density texture
+ cudaBindTexture(0,pbaTexAdvect,pbaTexAdvectSites); //Bind sites to a texture
+
+ kernelSplat<<< grid, block >>>(pbaTexDensityData, pbaTexSize, num_pts, radius);
+ //Splat kernel
+ cudaThreadSynchronize();
+
+ cudaUnbindTexture(pbaTexAdvect); //Done with the sites
+ //Copy splat-map to CPU (if desired)
+ if (output) cudaMemcpy(output, pbaTexDensityData, pbaTexSize * pbaTexSize * sizeof(float), cudaMemcpyDeviceToHost);
+}
+
+
+
+void advect2DGetSites(float* output, int num_pts) //Get sites CUDA->CPU
+{
+ cudaMemcpy(output,pbaTexAdvectSites,num_pts*sizeof(float2),cudaMemcpyDeviceToHost);
+}
+
+
+void advect2DGradient() //Compute and save gradient of density map
+{
+ dim3 block = dim3(BLOCKX,BLOCKY);
+ dim3 grid = dim3(pbaTexSize/block.x,pbaTexSize/block.y);
+
+ cudaMemset(pbaTexGradientData,0,sizeof(float2)*pbaTexSize*pbaTexSize); //Zero the gradient texture
+
+ pbaTexDensity.filterMode = cudaFilterModeLinear; //The floating-point density map
+ pbaTexDensity.normalized = false;
+ cudaChannelFormatDesc channelDesc1 = cudaCreateChannelDesc(32,0,0,0,cudaChannelFormatKindFloat);
+ cudaBindTexture2D(0,pbaTexDensity,pbaTexDensityData,channelDesc1,pbaTexSize,pbaTexSize,4*pbaTexSize);
+
+ kernelGradient<<< grid, block >>>(pbaTexGradientData, pbaTexSize); //Gradient kernel
+
+ cudaUnbindTexture(pbaTexDensity);
+}
+
+
+
+
+
diff --git a/skelft.h b/skelft.h
new file mode 100644
index 0000000..e426053
--- /dev/null
+++ b/skelft.h
@@ -0,0 +1,106 @@
+// CUDA-based Skeletonization, Distance Transforms, Feature Transforms, Erosion
+// and Dilation, and Flood Filling Toolkit
+//
+//(c) Alexandru Telea, Univ. of Groningen, 2011
+//====================================================================================================================
+#pragma once
+
+// Given an image of size nx x ny that we want to process, CUDA may need to use
+// a larger image (e.g. pow 2) internally
+// to handle our image. This returns the size of this larger image. Since we get
+// back data from CUDA at this size,
+// we need to know about the size to allocate all our app-side buffers.
+int skelft2DSize(int nx, int ny);
+
+// Initialize CUDA and allocate memory
+// textureSize is 2^k with k >= 6
+void skelft2DInitialization(int textureSize);
+
+// Deallocate all memory on GPU
+void skelft2DDeinitialization();
+
+// Set various FT computation params. Can be called before each FT computation
+void skelft2DParams(int phase1Band, int phase2Band, int phase3Band);
+
+// Compute 2D feature transform (or Voronoi diagram)
+// siteParam: 2D texture site parameterization. 0 = non-site pixel; >0 = site
+// parameter at current pixel.
+// output: 2D texture FT. Gives coords (i,j) of closest site to each pixel.
+// If output==0, the FT is still computed and stored on CUDA, but not
+// passed back to CPU.
+// size:
+void skelft2DFT(short *output, float *siteParam, short xm, short ym, short xM,
+ short yM, int size);
+
+// Compute thresholded skeleton of in-CUDA-memory FT.
+// length: max value of site parameter (needed for normalization)
+// threshold: threshold for the skeleton importance, like in the AFMM algorithm
+// output: binary thresholded skeleton (0=background,1=skeleton)
+void skelft2DSkeleton(unsigned char *output, float length, float threshold,
+ short xm, short ym, short xM, short yM);
+
+// Compute thresholded DT of in-CUDA-memory FT.
+// threshold: upper value for DT
+// output: binary thresholded DT (1=larger than threshold,0=otherwise)
+void skelft2DDT(short *output, float threshold, short xm, short ym, short xM,
+ short yM);
+
+// Compute exact DT of in-CUDA-memory FT.
+// output: exact floating-point DT
+void skelft2DDT(float *outputDT, short xm, short ym, short xM, short yM);
+
+// Make an arc-length parameterization of a binary shape, used for
+// skeletonization input
+// input: binary image whose boundary we parameterize
+// dx,dy:
+// param: arc-length parameterization image
+// size:
+// return: the boundary length
+float skelft2DMakeBoundary(const unsigned char *input, int xm, int ym, int xM,
+ int yM, float *param, int size, short iso = 1,
+ bool thr_upper = true);
+float skelft2DMakeBoundary(const float *input, int xm, int ym, int xM, int yM,
+ float *param, int size, float iso = 1,
+ bool thr_upper = true);
+
+// Compute topology events (skeleton endpoints) for an in-CUDA-memory
+// thresholded skeleton
+// topo: binary image (1=skel endpoints,0=otherwise), optional. If not
+// supplied, not returned
+// npts: on input, this gives the max #points we will return; on output,
+// set to the #points detected and returned
+// points: array of (x,y) pairs of the detected points
+extern "C" void skelft2DTopology(unsigned char *topo, int *npts, short *points,
+ short xm, short ym, short xM, short yM);
+
+// Utility: save given image to pgm file
+void skelft2DSave(short *outputFT, int dx, int dy, const char *f);
+
+// Compute DT of in-CUDA-memory skeleton
+void skel2DSkeletonDT(float *outputSkelDT, short xm, short ym, short xM,
+ short yM);
+
+// Fills all same-value 4-connected pixels from (seedx,seedy) with value
+// 'fill_val' in the in-CUDA-memory thresholded DT
+// outputFill: filled image
+// <return>: #iterations done for the fill, useful as a performance measure
+// (lower=better)
+int skelft2DFill(unsigned char *outputFill, short seedx, short seedy, short xm,
+ short ym, short xM, short yM, unsigned char foreground);
+
+int skelft2DFillHoles(unsigned char *outputFill, short x, short y,
+ unsigned char fill_val);
+
+//-----------------------
+
+void advect2DSetSites(float *sites, int nsites, int inside_sites);
+
+void advectSetImage(int *image, int nx, int ny);
+
+void advect2DGetSites(float *sites, int nsites);
+
+void advect2DSplat(float *density, int num_pts, float radius);
+
+void advectAttract(int num_pts, float step);
+
+void advectRelax(int num_pts, float map_avg, float radius);
diff --git a/skelft_core.cpp b/skelft_core.cpp
new file mode 100644
index 0000000..e4cec20
--- /dev/null
+++ b/skelft_core.cpp
@@ -0,0 +1,199 @@
+#include <cstdlib>
+#include <cmath>
+#include <cstring>
+#include <stack>
+#include <vector>
+#include <iostream>
+
+const float MYINFINITY = 1.0e7f;
+
+inline float SQR(float x) { return x * x; }
+inline int SQR(int x) { return x * x; }
+inline float INTERP(float a, float b, float t) { return a * (1 - t) + b * t; }
+
+struct Coord {
+ int i, j;
+ Coord(int i_, int j_)
+ : i(i_)
+ , j(j_){};
+ Coord() {}
+ int operator==(const Coord &c) const { return i == c.i && j == c.j; }
+};
+
+using namespace std;
+
+const float ALIVE = -10, NARROW_BAND = ALIVE + 1,
+ FAR_AWAY = ALIVE + 2; // Classification values for pixels in an
+ // image. The values are arbitrary.
+
+int skelft2DSize(int nx, int ny) {
+ int dx = (int) floor(
+ pow(2.0f, int(log(float(nx)) / log(2.0f) +
+ 1))); // Find minimal pow of 2 which fits the input image
+ int dy = (int) floor(pow(2.0f, int(log(float(ny)) / log(2.0f) + 1)));
+ int fboSize = max(dx, dy); // DT/FT/skeleton image size (pow of 2, should be
+ // able to contain the input image)
+
+ return fboSize;
+}
+
+// Classify 'f' into inside, outside, and boundary pixels according to isovalue
+// 'iso'
+// Inside pixels are those with value > iso (if thr_upper==true), else those
+// with value < iso.
+template <class T>
+void classify(vector<Coord> &s, const T *f, int xm, int ym, int xM, int yM,
+ int size, T iso, bool thr_upper, float *siteParam,
+ bool scan_bot_to_top) {
+ for (int i = 0; i < size * size; ++i)
+ siteParam[i] = ALIVE; // First: all points are ALIVE, i.e., sites
+
+ for (int j = ym; j < yM; ++j)
+ for (int i = xm; i < xM; ++i) {
+ T fptr = f[i + j * size]; // Mark subset of points which are
+ // FAR_AWAY, i.e. NOT sites
+ if (((thr_upper && fptr >= iso) || (!thr_upper && fptr <= iso)))
+ siteParam[i + size * j] = FAR_AWAY;
+ }
+
+ int js, je, jd;
+ if (scan_bot_to_top) {
+ js = ym;
+ je = yM;
+ jd = 1;
+ } else {
+ js = yM - 1;
+ je = ym - 1;
+ jd = -1;
+ }
+
+ for (int j = js; j != je; j += jd)
+ for (int i = xm; i < xM; ++i) {
+ float &cv = siteParam[i + size * j];
+ if (cv == FAR_AWAY)
+ if (((i - 1 + size * j) > 0 &&
+ siteParam[i - 1 + size * j] == ALIVE) ||
+ siteParam[i + 1 + size * j] == ALIVE ||
+ ((i + size * (j - 1)) > 0 &&
+ siteParam[i + size * (j - 1)] == ALIVE) ||
+ siteParam[i + size * (j + 1)] == ALIVE) {
+ cv = NARROW_BAND;
+ s.push_back(Coord(i, j));
+ }
+ }
+}
+
+// Encode input image (classified as inside/outside/boundary into a
+// boundary-length parameterization image, needed for skeleton computations.
+//
+float encodeBoundary(vector<Coord> &s, int xm, int ym, int xM, int yM,
+ float *siteParam, int size) {
+ const float LARGE_BOUNDARY_VAL =
+ 1000; // Value that the boundary-param will jump with between disjoint,
+ // self-connected, boundary pixel chains
+
+ float length = -LARGE_BOUNDARY_VAL + 1; // Compute the boundary length
+
+ while (s.size()) // Process all narrowband points, add their
+ // arc-length-parameterization in siteParam
+ {
+ Coord pt = s[s.size() - 1];
+ s.pop_back(); // Pick narrowband-point, test if already numbered
+ if (siteParam[pt.i + size * pt.j] > 0)
+ continue;
+
+ int ci, cj;
+ float cc = 0; // Point pt not numbered
+ for (int dj = -1; dj < 2; ++dj) // Find min-numbered-neighbour of pt
+ for (int di = -1; di < 2; ++di) {
+ int ii = pt.i + di, jj = pt.j + dj;
+ if (ii < xm || ii >= xM || jj < ym || jj >= yM)
+ continue;
+ float val = siteParam[ii + size * jj];
+ if (val < 0)
+ continue;
+ if (cc && val > cc)
+ continue;
+ ci = ii;
+ cj = jj;
+ cc = val;
+ }
+
+ float c = (!cc) ? length + LARGE_BOUNDARY_VAL
+ : cc + sqrt(float((pt.i - ci) * (pt.i - ci) +
+ (pt.j - cj) * (pt.j - cj)));
+
+ siteParam[pt.i + size * pt.j] =
+ c; // Also store the sites' parameterization in siteParam[] for CUDA
+
+ if (c > length)
+ length = c; // Determine length of boundary, used for
+ // wraparound-distance-computations
+
+ for (int dj = -1; dj < 2; ++dj)
+ for (int di = -1; di < 2; ++di) {
+ if (di == 0 && dj == 0)
+ continue;
+ int ii = pt.i + di, jj = pt.j + dj;
+ if (ii >= xm && ii < xM && jj >= ym && jj < yM &&
+ siteParam[ii + size * jj] == NARROW_BAND)
+ s.push_back(Coord(ii, jj));
+ }
+ }
+
+ for (int i = 0, iend = size * size; i < iend;
+ ++i) // All ALIVE points are marked now as non-sites:
+ if (siteParam[i] < 0)
+ siteParam[i] = 0;
+
+ return length;
+}
+
+float skelft2DMakeBoundary(const float *f, int xm, int ym, int xM, int yM,
+ float *siteParam, int size, float iso,
+ bool thr_upper) {
+ bool scan_bot_to_top = false;
+
+ vector<Coord> s;
+ s.reserve((xM - xm) * (yM - ym));
+
+ classify(s, f, xm, ym, xM, yM, size, iso, thr_upper, siteParam,
+ scan_bot_to_top);
+ return encodeBoundary(s, xm, ym, xM, yM, siteParam, size);
+}
+
+float skelft2DMakeBoundary(const unsigned char *f, int xm, int ym, int xM,
+ int yM, float *siteParam, int size, short iso,
+ bool thr_upper) {
+ bool scan_bot_to_top = false;
+
+ vector<Coord> s;
+ s.reserve((xM - xm) * (yM - ym));
+
+ classify(s, f, xm, ym, xM, yM, size, (unsigned char) iso, thr_upper,
+ siteParam, scan_bot_to_top);
+ return encodeBoundary(s, xm, ym, xM, yM, siteParam, size);
+}
+
+void skelft2DSave(short *outputFT, int dx, int dy, const char *f) {
+ FILE *fp = fopen(f, "wb");
+ fprintf(fp, "P5 %d %d 255\n", dx, dy);
+
+ const int SIZE = 3000;
+ unsigned char buf[SIZE];
+
+ int size = dx * dy;
+ int bb = 0;
+ float range = max(dx, dy) / 255.0f;
+ for (short *v = outputFT, *vend = outputFT + size; v < vend; ++v) {
+ short val = *v;
+ buf[bb++] = (unsigned char) (val / range);
+ if (bb == SIZE) {
+ fwrite(buf, sizeof(unsigned char), SIZE, fp);
+ bb = 0;
+ }
+ }
+ if (bb)
+ fwrite(buf, sizeof(unsigned char), bb, fp);
+ fclose(fp);
+}
diff --git a/skelftkernel.h b/skelftkernel.h
new file mode 100644
index 0000000..1182181
--- /dev/null
+++ b/skelftkernel.h
@@ -0,0 +1,468 @@
+// CUDA-based Skeletonization, Distance Transforms, Feature Transforms, Erosion
+// and Dilation, and Flood Filling Toolkit
+//
+//(c) Alexandru Telea, Univ. of Groningen, 2011
+//
+// This code is adapted from "Parallel Banding Algorithm to compute exact
+// distance transforms with the GPU",
+// T. Cao, K. Tang, A. Mohamed, T. Tan, Proc. ACM Symo. on Interactive 3D
+// Graphics and Games (I3D), 2010, 83-90
+//====================================================================================================================
+
+// MARKER is used to mark blank pixels in the texture. Any uncolored pixels will
+// have x = MARKER.
+// Input texture should have x = MARKER for all pixels other than sites
+#define MARKER -32768
+
+#define TOID(x, y, size) (__mul24((y), (size)) + (x))
+
+// Transpose a square matrix
+__global__ void kernelTranspose(short2 *data, int size) {
+ __shared__ short2 block1[TILE_DIM][TILE_DIM + 1];
+ __shared__ short2 block2[TILE_DIM][TILE_DIM + 1];
+
+ int blockIdx_y = blockIdx.x;
+ int blockIdx_x = blockIdx.x + blockIdx.y;
+
+ if (blockIdx_x >= gridDim.x)
+ return;
+
+ int blkX, blkY, x, y, id1, id2;
+ short2 pixel;
+
+ blkX = __mul24(blockIdx_x, TILE_DIM);
+ blkY = __mul24(blockIdx_y, TILE_DIM);
+
+ x = blkX + threadIdx.x;
+ y = blkY + threadIdx.y;
+ id1 = __mul24(y, size) + x;
+
+ x = blkY + threadIdx.x;
+ y = blkX + threadIdx.y;
+ id2 = __mul24(y, size) + x;
+
+ // read the matrix tile into shared memory
+ for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
+ int idx = __mul24(i, size);
+ block1[threadIdx.y + i][threadIdx.x] =
+ tex1Dfetch(pbaTexColor, id1 + idx);
+ block2[threadIdx.y + i][threadIdx.x] =
+ tex1Dfetch(pbaTexColor, id2 + idx);
+ }
+
+ __syncthreads();
+
+ // write the transposed matrix tile to global memory
+ for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
+ int idx = __mul24(i, size);
+ pixel = block1[threadIdx.x][threadIdx.y + i];
+ data[id2 + idx] = make_short2(pixel.y, pixel.x);
+ pixel = block2[threadIdx.x][threadIdx.y + i];
+ data[id1 + idx] = make_short2(pixel.y, pixel.x);
+ }
+}
+
+__global__ void kernelFloodDown(short2 *output, int size, int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * bandSize;
+ int id = TOID(tx, ty, size);
+
+ short2 pixel1, pixel2;
+
+ pixel1 = make_short2(MARKER, MARKER);
+
+ for (int i = 0; i < bandSize; ++i, id += size) {
+ pixel2 = tex1Dfetch(pbaTexColor, id);
+
+ if (pixel2.x != MARKER)
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+}
+
+__global__ void kernelFloodUp(short2 *output, int size, int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = (blockIdx.y + 1) * bandSize - 1;
+ int id = TOID(tx, ty, size);
+
+ short2 pixel1, pixel2;
+ int dist1, dist2;
+
+ pixel1 = make_short2(MARKER, MARKER);
+
+ for (int i = 0; i < bandSize; i++, id -= size) {
+ dist1 = abs(pixel1.y - ty + i);
+
+ pixel2 = tex1Dfetch(pbaTexColor, id);
+ dist2 = abs(pixel2.y - ty + i);
+
+ if (dist2 < dist1)
+ pixel1 = pixel2;
+
+ output[id] = pixel1;
+ }
+}
+
+__global__ void kernelPropagateInterband(short2 *output, int size,
+ int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int inc = __mul24(bandSize, size);
+ int ny, nid, nDist;
+ short2 pixel;
+
+ // Top row, look backward
+ int ty = __mul24(blockIdx.y, bandSize);
+ int topId = TOID(tx, ty, size);
+ int bottomId = TOID(tx, ty + bandSize - 1, size);
+
+ pixel = tex1Dfetch(pbaTexColor, topId);
+ int myDist = abs(pixel.y - ty);
+
+ for (nid = bottomId - inc; nid >= 0; nid -= inc) {
+ pixel = tex1Dfetch(pbaTexColor, nid);
+ if (pixel.x != MARKER) {
+ nDist = abs(pixel.y - ty);
+ if (nDist < myDist)
+ output[topId] = pixel;
+ break;
+ }
+ }
+
+ // Last row, look downward
+ ty = ty + bandSize - 1;
+ pixel = tex1Dfetch(pbaTexColor, bottomId);
+ myDist = abs(pixel.y - ty);
+
+ for (ny = ty + 1, nid = topId + inc; ny < size;
+ ny += bandSize, nid += inc) {
+ pixel = tex1Dfetch(pbaTexColor, nid);
+
+ if (pixel.x != MARKER) {
+ nDist = abs(pixel.y - ty);
+ if (nDist < myDist)
+ output[bottomId] = pixel;
+ break;
+ }
+ }
+}
+
+__global__ void kernelUpdateVertical(short2 *output, int size, int band,
+ int bandSize) {
+ int tx = blockIdx.x * blockDim.x + threadIdx.x;
+ int ty = blockIdx.y * bandSize;
+
+ short2 top = tex1Dfetch(pbaTexLinks, TOID(tx, ty, size));
+ short2 bottom = tex1Dfetch(pbaTexLinks, TOID(tx, ty + bandSize - 1, size));
+ short2 pixel;
+
+ int dist, myDist;
+
+ int id = TOID(tx, ty, size);
+
+ for (int i = 0; i < bandSize; i++, id += size) {
+ pixel = tex1Dfetch(pbaTexColor, id);
+ myDist = abs(pixel.y - (ty + i));
+
+ dist = abs(top.y - (ty + i));
+ if (dist < myDist) {
+ myDist = dist;
+ pixel = top;
+ }
+
+ dist = abs(bottom.y - (ty + i));
+ if (dist < myDist)
+ pixel = bottom;
+
+ output[id] = pixel;
+ }
+}
+
+// Input: y1 < y2
+__device__ float interpoint(int x1, int y1, int x2, int y2, int x0) {
+ float xM = float(x1 + x2) / 2.0f;
+ float yM = float(y1 + y2) / 2.0f;
+ float nx = x2 - x1;
+ float ny = y2 - y1;
+
+ return yM + nx * (xM - x0) / ny;
+}
+
+__global__ void kernelProximatePoints(short2 *stack, int size, int bandSize) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int ty = __mul24(blockIdx.y, bandSize);
+ int id = TOID(tx, ty, size);
+ int lasty = -1;
+ short2 last1, last2, current;
+ float i1, i2;
+
+ last1.y = -1;
+ last2.y = -1;
+
+ for (int i = 0; i < bandSize; i++, id += size) {
+ current = tex1Dfetch(pbaTexColor, id);
+
+ if (current.x != MARKER) {
+ while (last2.y >= 0) {
+ i1 = interpoint(last1.x, last2.y, last2.x, lasty, tx);
+ i2 = interpoint(last2.x, lasty, current.x, current.y, tx);
+
+ if (i1 < i2)
+ break;
+
+ lasty = last2.y;
+ last2 = last1;
+
+ if (last1.y >= 0)
+ last1 = stack[TOID(tx, last1.y, size)];
+ }
+
+ last1 = last2;
+ last2 = make_short2(current.x, lasty);
+ lasty = current.y;
+
+ stack[id] = last2;
+ }
+ }
+
+ // Store the pointer to the tail at the last pixel of this band
+ if (lasty != ty + bandSize - 1)
+ stack[TOID(tx, ty + bandSize - 1, size)] = make_short2(MARKER, lasty);
+}
+
+__global__ void kernelCreateForwardPointers(short2 *output, int size,
+ int bandSize) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int ty = __mul24(blockIdx.y + 1, bandSize) - 1;
+ int id = TOID(tx, ty, size);
+ int lasty = -1, nexty;
+ short2 current;
+
+ // Get the tail pointer
+ current = tex1Dfetch(pbaTexLinks, id);
+
+ if (current.x == MARKER)
+ nexty = current.y;
+ else
+ nexty = ty;
+
+ for (int i = 0; i < bandSize; i++, id -= size)
+ if (ty - i == nexty) {
+ current = make_short2(lasty, tex1Dfetch(pbaTexLinks, id).y);
+ output[id] = current;
+
+ lasty = nexty;
+ nexty = current.y;
+ }
+
+ // Store the pointer to the head at the first pixel of this band
+ if (lasty != ty - bandSize + 1)
+ output[id + size] = make_short2(lasty, MARKER);
+}
+
+__global__ void kernelMergeBands(short2 *output, int size, int bandSize) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int band1 = blockIdx.y * 2;
+ int band2 = band1 + 1;
+ int firsty, lasty;
+ short2 last1, last2, current;
+ // last1 and last2: x component store the x coordinate of the site,
+ // y component store the backward pointer
+ // current: y component store the x coordinate of the site,
+ // x component store the forward pointer
+
+ // Get the two last items of the first list
+ lasty = __mul24(band2, bandSize) - 1;
+ last2 = make_short2(tex1Dfetch(pbaTexColor, TOID(tx, lasty, size)).x,
+ tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size)).y);
+
+ if (last2.x == MARKER) {
+ lasty = last2.y;
+
+ if (lasty >= 0)
+ last2 =
+ make_short2(tex1Dfetch(pbaTexColor, TOID(tx, lasty, size)).x,
+ tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size)).y);
+ else
+ last2 = make_short2(MARKER, MARKER);
+ }
+
+ if (last2.y >= 0) {
+ // Second item at the top of the stack
+ last1 = make_short2(tex1Dfetch(pbaTexColor, TOID(tx, last2.y, size)).x,
+ tex1Dfetch(pbaTexLinks, TOID(tx, last2.y, size)).y);
+ }
+
+ // Get the first item of the second band
+ firsty = __mul24(band2, bandSize);
+ current = make_short2(tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size)).x,
+ tex1Dfetch(pbaTexColor, TOID(tx, firsty, size)).x);
+
+ if (current.y == MARKER) {
+ firsty = current.x;
+
+ if (firsty >= 0)
+ current =
+ make_short2(tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size)).x,
+ tex1Dfetch(pbaTexColor, TOID(tx, firsty, size)).x);
+ else
+ current = make_short2(MARKER, MARKER);
+ }
+
+ float i1, i2;
+
+ // Count the number of item in the second band that survive so far.
+ // Once it reaches 2, we can stop.
+ int top = 0;
+
+ while (top < 2 && current.y >= 0) {
+ // While there's still something on the left
+ while (last2.y >= 0) {
+ i1 = interpoint(last1.x, last2.y, last2.x, lasty, tx);
+ i2 = interpoint(last2.x, lasty, current.y, firsty, tx);
+
+ if (i1 < i2)
+ break;
+
+ lasty = last2.y;
+ last2 = last1;
+ --top;
+
+ if (last1.y >= 0)
+ last1 = make_short2(
+ tex1Dfetch(pbaTexColor, TOID(tx, last1.y, size)).x,
+ output[TOID(tx, last1.y, size)].y);
+ }
+
+ // Update the current pointer
+ output[TOID(tx, firsty, size)] = make_short2(current.x, lasty);
+
+ if (lasty >= 0)
+ output[TOID(tx, lasty, size)] = make_short2(firsty, last2.y);
+
+ last1 = last2;
+ last2 = make_short2(current.y, lasty);
+ lasty = firsty;
+ firsty = current.x;
+
+ top = max(1, top + 1);
+
+ // Advance the current pointer to the next one
+ if (firsty >= 0)
+ current =
+ make_short2(tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size)).x,
+ tex1Dfetch(pbaTexColor, TOID(tx, firsty, size)).x);
+ else
+ current = make_short2(MARKER, MARKER);
+ }
+
+ // Update the head and tail pointer.
+ firsty = __mul24(band1, bandSize);
+ lasty = __mul24(band2, bandSize);
+ current = tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size));
+
+ if (current.y == MARKER && current.x < 0) { // No head?
+ last1 = tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size));
+
+ if (last1.y == MARKER)
+ current.x = last1.x;
+ else
+ current.x = lasty;
+
+ output[TOID(tx, firsty, size)] = current;
+ }
+
+ firsty = __mul24(band1, bandSize) + bandSize - 1;
+ lasty = __mul24(band2, bandSize) + bandSize - 1;
+ current = tex1Dfetch(pbaTexLinks, TOID(tx, lasty, size));
+
+ if (current.x == MARKER && current.y < 0) { // No tail?
+ last1 = tex1Dfetch(pbaTexLinks, TOID(tx, firsty, size));
+
+ if (last1.x == MARKER)
+ current.y = last1.y;
+ else
+ current.y = firsty;
+
+ output[TOID(tx, lasty, size)] = current;
+ }
+}
+
+__global__ void kernelDoubleToSingleList(short2 *output, int size) {
+ int tx = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
+ int ty = blockIdx.y;
+ int id = TOID(tx, ty, size);
+
+ output[id] = make_short2(tex1Dfetch(pbaTexColor, id).x,
+ tex1Dfetch(pbaTexLinks, id).y);
+}
+
+__global__ void kernelColor(short2 *output, int size) {
+ __shared__ short2 s_last1[BLOCKSIZE], s_last2[BLOCKSIZE];
+ __shared__ int s_lasty[BLOCKSIZE];
+
+ int col = threadIdx.x;
+ int tid = threadIdx.y;
+ int tx = __mul24(blockIdx.x, blockDim.x) + col;
+ int dx, dy, lasty;
+ unsigned int best, dist;
+ short2 last1, last2;
+
+ if (tid == blockDim.y - 1) {
+ lasty = size - 1;
+
+ last2 = tex1Dfetch(pbaTexColor, __mul24(lasty, size) + tx);
+
+ if (last2.x == MARKER) {
+ lasty = last2.y;
+ last2 = tex1Dfetch(pbaTexColor, __mul24(lasty, size) + tx);
+ }
+
+ if (last2.y >= 0)
+ last1 = tex1Dfetch(pbaTexColor, __mul24(last2.y, size) + tx);
+
+ s_last1[col] = last1;
+ s_last2[col] = last2;
+ s_lasty[col] = lasty;
+ }
+
+ __syncthreads();
+
+ for (int ty = size - 1 - tid; ty >= 0; ty -= blockDim.y) {
+ last1 = s_last1[col];
+ last2 = s_last2[col];
+ lasty = s_lasty[col];
+
+ dx = last2.x - tx;
+ dy = lasty - ty;
+ best = dist = __mul24(dx, dx) + __mul24(dy, dy);
+
+ while (last2.y >= 0) {
+ dx = last1.x - tx;
+ dy = last2.y - ty;
+ dist = __mul24(dx, dx) + __mul24(dy, dy);
+
+ if (dist > best)
+ break;
+
+ best = dist;
+ lasty = last2.y;
+ last2 = last1;
+
+ if (last2.y >= 0)
+ last1 = tex1Dfetch(pbaTexColor, __mul24(last2.y, size) + tx);
+ }
+
+ __syncthreads();
+
+ output[TOID(tx, ty, size)] = make_short2(last2.x, lasty);
+
+ if (tid == blockDim.y - 1) {
+ s_last1[col] = last1;
+ s_last2[col] = last2;
+ s_lasty[col] = lasty;
+ }
+
+ __syncthreads();
+ }
+} \ No newline at end of file
diff --git a/voronoisplat.cpp b/voronoisplat.cpp
new file mode 100644
index 0000000..b8ab426
--- /dev/null
+++ b/voronoisplat.cpp
@@ -0,0 +1,407 @@
+#include "voronoisplat.h"
+
+#include <algorithm>
+
+#include <QQuickWindow>
+#include <QOpenGLFramebufferObject>
+
+#include "scale.h"
+#include "skelft.h"
+
+static const float RAD_BLUR = 5.0f;
+static const int COLORMAP_SAMPLES = 128;
+
+VoronoiSplatRenderer::VoronoiSplatRenderer(const QSize &size)
+ : m_item(0)
+ , gl(QOpenGLContext::currentContext())
+ , m_size(size)
+{
+ setupShaders();
+
+ // sitesVAO: VBOs 0 & 1 are for sites & their values (init'd later)
+ m_sitesVAO.create();
+ gl.glGenBuffers(3, m_VBOs);
+
+ // 2ndPassVAO: VBO 2 is a quad mapping the final texture to the framebuffer
+ m_2ndPassVAO.create();
+ m_2ndPassVAO.bind();
+ GLfloat verts[] = {-1.0f, -1.0f, -1.0f, 1.0f,
+ 1.0f, -1.0f, 1.0f, 1.0f};
+ gl.glBindBuffer(GL_ARRAY_BUFFER, m_VBOs[2]);
+ gl.glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STATIC_DRAW);
+
+ int vertAttrib = m_program2->attributeLocation("vert");
+ gl.glVertexAttribPointer(vertAttrib, 2, GL_FLOAT, GL_FALSE, 0, 0);
+ gl.glEnableVertexAttribArray(vertAttrib);
+ m_2ndPassVAO.release();
+
+ setupTextures();
+}
+
+void VoronoiSplatRenderer::setupShaders()
+{
+ m_program1 = new QOpenGLShaderProgram;
+ m_program1->addShaderFromSourceCode(QOpenGLShader::Vertex,
+R"EOF(#version 440
+
+uniform float rad_blur;
+uniform float rad_max;
+uniform float fbSize;
+
+in vec2 vert;
+in float scalar;
+
+out float value;
+
+void main() {
+ gl_PointSize = (rad_max + rad_blur) * 2.0;
+ gl_Position = vec4(2.0 * (vert / fbSize) - 1.0, 0, 1);
+ value = scalar;
+}
+)EOF");
+ m_program1->addShaderFromSourceCode(QOpenGLShader::Fragment,
+R"EOF(#version 440
+
+uniform sampler2D siteDT;
+uniform float rad_blur;
+uniform float rad_max;
+
+in float value;
+
+layout (location = 0) out vec4 fragColor;
+
+void main() {
+ float dt = texelFetch(siteDT, ivec2(gl_FragCoord.xy), 0).r;
+ if (dt > rad_max)
+ discard;
+ else {
+ float r = distance(gl_PointCoord, vec2(0.5, 0.5)) * (rad_max + rad_blur) * 2.0;
+ float r2 = r * r;
+ float rad = dt + rad_blur;
+ float rad2 = rad * rad;
+ if (r2 > rad2)
+ discard;
+ else {
+ float w = exp(-5.0 * r2 / rad2);
+ fragColor = vec4(w * value, w, 0.0, 0.0);
+ }
+ }
+}
+)EOF");
+ m_program1->link();
+
+ m_program2 = new QOpenGLShaderProgram;
+ m_program2->addShaderFromSourceCode(QOpenGLShader::Vertex,
+R"EOF(#version 440
+
+in vec2 vert;
+
+void main() {
+ gl_Position = vec4(vert, 0, 1);
+}
+)EOF");
+
+ m_program2->addShaderFromSourceCode(QOpenGLShader::Fragment,
+R"EOF(
+#version 440
+
+layout (location = 0) out vec4 fragColor;
+
+uniform sampler2D siteDT;
+uniform sampler2D accumTex;
+uniform sampler2D colormap;
+uniform float rad_max;
+
+vec3 getRGB(float value) {
+ return texture2D(colormap, vec2(0, 1)).rgb;
+}
+
+void main() {
+ float dt = texelFetch(siteDT, ivec2(gl_FragCoord.xy), 0).r;
+ if (dt > rad_max)
+ discard;
+ else {
+ vec4 accum = texelFetch(accumTex, ivec2(gl_FragCoord.xy), 0);
+ // 1.0 is extra-accumulated because of white background
+ float value = 0.0f;
+ if (accum.g > 1.0)
+ value = (accum.r - 1.0) / (accum.g - 1.0);
+ else
+ value = accum.r / accum.g;
+ fragColor.rgb = getRGB(value);
+ fragColor.a = 1.0 - dt / rad_max;
+ }
+}
+)EOF");
+ m_program2->link();
+}
+
+void VoronoiSplatRenderer::setupTextures()
+{
+ gl.glGenTextures(2, m_textures);
+
+ // textures[0] stores the DT values for each pixel
+ gl.glBindTexture(GL_TEXTURE_2D, m_textures[0]);
+ gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, m_size.width(),
+ m_size.height(), 0, GL_RG, GL_FLOAT, 0);
+ gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+ // textures[1] is the result of the first pass
+ gl.glBindTexture(GL_TEXTURE_2D, m_textures[1]);
+ gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, m_size.width(),
+ m_size.height(), 0, GL_RGBA, GL_FLOAT, 0);
+ gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+ // Used for colormap lookup in the frag shader
+ gl.glGenTextures(1, &m_colorMapTex);
+ gl.glBindTexture(GL_TEXTURE_2D, m_colorMapTex);
+ gl.glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, COLORMAP_SAMPLES, 1, 0, GL_RGB,
+ GL_FLOAT, 0);
+ gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ gl.glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+}
+
+VoronoiSplatRenderer::~VoronoiSplatRenderer()
+{
+ gl.glDeleteBuffers(2, m_VBOs);
+ delete m_program1;
+}
+
+void VoronoiSplatRenderer::setSites(const arma::mat &points)
+{
+ if (points.n_rows < 1 || points.n_cols != 2
+ || (m_values.size() != 0 && points.n_rows != m_values.size())) {
+ return;
+ }
+
+ // Copy 'points' to internal data structure(s)
+ copyPoints(points);
+
+ // Update VBO with the new data
+ m_sitesVAO.bind();
+ gl.glBindBuffer(GL_ARRAY_BUFFER, m_VBOs[0]);
+ gl.glBufferData(GL_ARRAY_BUFFER, m_sites.size() * sizeof(float),
+ m_sites.data(), GL_STATIC_DRAW);
+
+ int vertAttrib = m_program1->attributeLocation("vert");
+ gl.glVertexAttribPointer(vertAttrib, 2, GL_FLOAT, GL_FALSE, 0, 0);
+ gl.glEnableVertexAttribArray(vertAttrib);
+ m_sitesVAO.release();
+
+ // Compute DT values for the new positions
+ computeDT();
+}
+
+void VoronoiSplatRenderer::copyPoints(const arma::mat &points)
+{
+ double minX = points.col(0).min();
+ double maxX = points.col(0).max();
+ double minY = points.col(1).min();
+ double maxY = points.col(1).max();
+
+ // Coords are tighly packed into 'm_sites' as [ (x1, y1), (x2, y2), ... ]
+ m_sites.resize(points.n_rows * 2);
+ LinearScale<float> sx(minX, maxX, 1, m_size.width() - 1);
+ const double *col = points.colptr(0);
+ for (unsigned i = 0; i < points.n_rows; i++) {
+ m_sites[2*i] = sx(col[i]);
+ }
+
+ col = points.colptr(1);
+ LinearScale<float> sy(minY, maxY, m_size.height() - 1, 1);
+ for (unsigned i = 0; i < points.n_rows; i++) {
+ m_sites[2*i + 1] = sy(col[i]);
+ }
+}
+
+void VoronoiSplatRenderer::computeDT()
+{
+ int w = m_size.width(), h = m_size.height();
+
+ std::vector<float> buf(w * h, 0.0f);
+ for (unsigned i = 0; i < m_sites.size(); i += 2) {
+ buf[int(m_sites[i + 1]) * h + int(m_sites[i])] = (float) i / 2 + 1;
+ }
+
+ // Compute FT of the sites
+ skelft2DFT(0, buf.data(), 0, 0, w, h, w);
+ // Compute DT of the sites (from the resident FT)
+ skelft2DDT(buf.data(), 0, 0, w, h);
+
+ std::vector<GLfloat> dtTexData(w * h * 2, 0.0f);
+ for (unsigned i = 0; i < buf.size(); i++) {
+ dtTexData[2*i] = buf[i];
+ }
+
+ // Upload result to lookup texture
+ gl.glBindTexture(GL_TEXTURE_2D, m_textures[0]);
+ gl.glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_size.width(), m_size.height(),
+ GL_RG, GL_FLOAT, dtTexData.data());
+}
+
+void VoronoiSplatRenderer::setValues(const arma::vec &values)
+{
+ if (values.n_elem == 0
+ || (m_sites.size() != 0 && values.n_elem != m_sites.size() / 2)) {
+ return;
+ }
+
+ m_values.resize(values.n_elem);
+ std::copy(values.begin(), values.end(), m_values.begin());
+
+ // Update VBO with the new data
+ m_sitesVAO.bind();
+ gl.glBindBuffer(GL_ARRAY_BUFFER, m_VBOs[1]);
+ gl.glBufferData(GL_ARRAY_BUFFER, m_values.size() * sizeof(float),
+ m_values.data(), GL_DYNAMIC_DRAW);
+
+ int valueAttrib = m_program1->attributeLocation("scalar");
+ gl.glVertexAttribPointer(valueAttrib, 1, GL_FLOAT, GL_FALSE, 0, 0);
+ gl.glEnableVertexAttribArray(valueAttrib);
+ m_sitesVAO.release();
+}
+
+void VoronoiSplatRenderer::setColorMap(const ColorScale *scale)
+{
+ if (!scale) {
+ return;
+ }
+
+ float t = scale->min();
+ float step = (scale->max() - scale->min()) / COLORMAP_SAMPLES;
+ qreal r, g, b;
+ std::vector<float> cmap(COLORMAP_SAMPLES * 3); // R,G,B
+ for (int i = 0; i < COLORMAP_SAMPLES * 3; i += 3) {
+ scale->color(t).getRgbF(&r, &g, &b);
+ cmap[i + 0] = b;
+ cmap[i + 1] = g;
+ cmap[i + 2] = r;
+
+ t += step;
+ }
+
+ // Update texture data
+ gl.glBindTexture(GL_TEXTURE_2D, m_colorMapTex);
+ gl.glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, COLORMAP_SAMPLES, 1, GL_RGB,
+ GL_FLOAT, cmap.data());
+}
+
+QOpenGLFramebufferObject *VoronoiSplatRenderer::createFramebufferObject(const QSize &size)
+{
+ return new QOpenGLFramebufferObject(size);
+}
+
+void VoronoiSplatRenderer::synchronize(QQuickFramebufferObject *item)
+{
+ m_item = static_cast<VoronoiSplat *>(item);
+
+ if (m_item->colorScaleUpdated()) {
+ setColorMap(m_item->colorScale());
+ }
+ if (m_item->pointsUpdated()) {
+ setSites(m_item->points());
+ }
+ if (m_item->valuesUpdated()) {
+ setValues(m_item->values());
+ }
+
+ m_item->setUpdated(false);
+}
+
+void VoronoiSplatRenderer::render()
+{
+ // Store the final texture to render to, as we use a two-pass rendering
+ // which first renders to an intermediate texture
+ GLint targetTexture;
+ gl.glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0, GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME,
+ &targetTexture);
+
+ // The first pass
+ m_program1->bind();
+ m_program1->setUniformValue("rad_max", 20.0f);
+ m_program1->setUniformValue("rad_blur", RAD_BLUR);
+ m_program1->setUniformValue("fbSize", float(m_size.width()));
+
+ gl.glActiveTexture(GL_TEXTURE0);
+ gl.glBindTexture(GL_TEXTURE_2D, m_textures[0]);
+ m_program1->setUniformValue("siteDT", 0);
+
+ gl.glEnable(GL_POINT_SPRITE);
+ gl.glEnable(GL_PROGRAM_POINT_SIZE);
+ gl.glEnable(GL_BLEND);
+ gl.glBlendFunc(GL_ONE, GL_ONE);
+
+ // In the first pass we draw to an intermediate texture, which is used as
+ // input for the next pass
+ gl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_TEXTURE_2D, m_textures[1], 0);
+
+ gl.glClearColor(1, 1, 1, 1);
+ gl.glClear(GL_COLOR_BUFFER_BIT);
+
+ m_sitesVAO.bind();
+ gl.glDrawArrays(GL_POINTS, 0, m_values.size());
+ m_sitesVAO.release();
+
+ m_program1->release();
+
+ // Second pass
+ m_program2->bind();
+ m_program2->setUniformValue("rad_max", 20.0f);
+
+ gl.glActiveTexture(GL_TEXTURE0);
+ gl.glBindTexture(GL_TEXTURE_2D, m_textures[0]);
+ m_program2->setUniformValue("siteDT", 0);
+ gl.glActiveTexture(GL_TEXTURE1);
+ gl.glBindTexture(GL_TEXTURE_2D, m_textures[1]);
+ m_program2->setUniformValue("accumTex", 1);
+ gl.glActiveTexture(GL_TEXTURE2);
+ gl.glBindTexture(GL_TEXTURE_2D, m_colorMapTex);
+ m_program2->setUniformValue("colormap", 2);
+
+ gl.glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+
+ // Restore the original framebuffer texture
+ gl.glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_TEXTURE_2D, targetTexture, 0);
+
+ glClearColor(1, 1, 1, 1);
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ m_2ndPassVAO.bind();
+ gl.glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ m_2ndPassVAO.release();
+
+ m_program2->release();
+
+ m_item->window()->resetOpenGLState();
+}
+
+static int nextPow2(int n)
+{
+ // TODO: check for overflows
+ n--;
+ for (int shift = 1; ((n + 1) & n); shift <<= 1) {
+ n |= n >> shift;
+ }
+ return n + 1;
+}
+
+VoronoiSplat::VoronoiSplat(QQuickItem *parent)
+ : QQuickFramebufferObject(parent)
+ , m_colorScaleUpdated(false)
+ , m_pointsUpdated(false)
+ , m_valuesUpdated(false)
+ , m_colorScale(0)
+{
+ setTextureFollowsItemSize(false);
+}
+
+QQuickFramebufferObject::Renderer *VoronoiSplat::createRenderer() const
+{
+ int baseSize = nextPow2(std::min(width(), height()));
+ return new VoronoiSplatRenderer(QSize(baseSize, baseSize));
+}
diff --git a/voronoisplat.h b/voronoisplat.h
new file mode 100644
index 0000000..ce84e74
--- /dev/null
+++ b/voronoisplat.h
@@ -0,0 +1,103 @@
+#ifndef VORONOISPLAT_H
+#define VORONOISPLAT_H
+
+#include <QQuickFramebufferObject>
+#include <QOpenGLFunctions>
+#include <QOpenGLBuffer>
+#include <QOpenGLShaderProgram>
+#include <QOpenGLVertexArrayObject>
+#include <armadillo>
+
+#include "colorscale.h"
+
+class VoronoiSplat; // defined after this class
+
+class VoronoiSplatRenderer
+ : public QQuickFramebufferObject::Renderer
+{
+public:
+ // 'size' must be square (and power of 2); item is the QQuickFBO that
+ // creates this
+ VoronoiSplatRenderer(const QSize &size);
+ ~VoronoiSplatRenderer();
+
+ void synchronize(QQuickFramebufferObject *item);
+ // 'points' should be a 2D points matrix (each point in a row)
+ void setSites(const arma::mat &points);
+ void setValues(const arma::vec &values);
+ // Set colormap data based on the given color scale;
+ void setColorMap(const ColorScale *scale);
+
+ QOpenGLFramebufferObject *createFramebufferObject(const QSize & size);
+ void render();
+
+private:
+ void setupShaders();
+ void setupTextures();
+ void copyPoints(const arma::mat &points);
+ void computeDT();
+
+ VoronoiSplat *m_item;
+ QOpenGLFunctions gl;
+ QOpenGLShaderProgram *m_program1, *m_program2;
+ GLuint m_VBOs[3];
+ GLuint m_textures[2], m_colorMapTex;
+ QOpenGLVertexArrayObject m_sitesVAO, m_2ndPassVAO;
+
+ std::vector<float> m_sites, m_values;
+ QSize m_size;
+};
+
+class VoronoiSplat
+ : public QQuickFramebufferObject
+{
+ Q_OBJECT
+public:
+ VoronoiSplat(QQuickItem *parent = 0);
+ Renderer *createRenderer() const;
+
+ const arma::mat &points() const { return m_points; }
+ const arma::vec &values() const { return m_values; }
+ const ColorScale *colorScale() const { return m_colorScale; }
+
+ bool colorScaleUpdated() const { return m_colorScaleUpdated; }
+ bool pointsUpdated() const { return m_pointsUpdated; }
+ bool valuesUpdated() const { return m_valuesUpdated; }
+
+public slots:
+ void setColorScale(const ColorScale *scale)
+ {
+ m_colorScale = scale;
+ m_colorScaleUpdated = true;
+ update();
+ }
+
+ void setPoints(const arma::mat &points)
+ {
+ m_points = points;
+ m_pointsUpdated = true;
+ update();
+ }
+
+ void setValues(const arma::vec &values)
+ {
+ m_values = values;
+ m_valuesUpdated = true;
+ update();
+ }
+
+ void setUpdated(bool updated)
+ {
+ m_colorScaleUpdated = updated;
+ m_pointsUpdated = updated;
+ m_valuesUpdated = updated;
+ }
+
+private:
+ bool m_colorScaleUpdated, m_pointsUpdated, m_valuesUpdated;
+ const ColorScale *m_colorScale;
+ arma::mat m_points;
+ arma::vec m_values;
+};
+
+#endif // VORONOISPLAT_H