diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..1eead6f
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.15...4.0)
+project(tripclust LANGUAGES CXX)
+
+set(PYBIND11_FINDPYTHON ON)
+find_package(pybind11 CONFIG REQUIRED)
+
+find_package (Eigen3 REQUIRED)
+include_directories(${EIGEN3_INCLUDE_DIR})
+
+pybind11_add_module(tripclust src/spyral_utils/tripclust/pybind.cpp src/spyral_utils/tripclust/src/postprocess.cpp src/spyral_utils/tripclust/src/directedgraph.cpp src/spyral_utils/tripclust/src/orthogonallsq.cpp src/spyral_utils/tripclust/src/pointcloud.cpp src/spyral_utils/tripclust/src/option.cpp src/spyral_utils/tripclust/src/cluster.cpp src/spyral_utils/tripclust/src/dnn.cpp src/spyral_utils/tripclust/src/graph.cpp src/spyral_utils/tripclust/src/triplet.cpp src/spyral_utils/tripclust/src/util.cpp src/spyral_utils/tripclust/src/hclust/fastcluster.cpp src/spyral_utils/tripclust/src/kdtree/kdtree.cpp)
+install(TARGETS tripclust DESTINATION .)
diff --git a/README.md b/README.md
index a5f42ea..4e0846a 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@ spyral-utils is a utility library that contains some of the core functionality
- Some histogramming and gating/cuting tools that are plotting backend agnostic
- Energy loss analysis for gas and solid targets using pycatima
- 4-vector analysis through the vector package
+- Point triplet clustering algorithm (also called tripclust or Dalitz clustering)
See the [documentation](https://attpc.github.io/spyral-utils/) for more details.
@@ -69,9 +70,9 @@ For a gas mixture:
}
```
-Compound specifications are lists of elements where each element is an array of `[Z, A, S]`. `S` is the
-stoichiometry of that particular element in the compound. spyral-utils does not support target layers at
-this time (but layered targets can be built from the building blocks provided by spyral-utils). In the above examples the
+Compound specifications are lists of elements where each element is an array of `[Z, A, S]`. `S` is the
+stoichiometry of that particular element in the compound. spyral-utils does not support target layers at
+this time (but layered targets can be built from the building blocks provided by spyral-utils). In the above examples the
gas target is for 1H2 gas at 300 Torr pressure and the solid target is for
12C1 foil with a thickness of 50 μg/cm2. The gas mixutre example is for
P10 gas (10% methane in argon) at 50 Torr.
@@ -172,3 +173,4 @@ For testing we use
- Gordon McCann
- Nathan Turi
+- Daniel Bazin
diff --git a/docs/api/tripclust/index.md b/docs/api/tripclust/index.md
new file mode 100644
index 0000000..fd33f60
--- /dev/null
+++ b/docs/api/tripclust/index.md
@@ -0,0 +1,3 @@
+# Module Overview
+
+::: spyral_utils.plot
diff --git a/pyproject.toml b/pyproject.toml
index b904687..ad720e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,11 @@
[project]
name = "spyral-utils"
-version = "2.0.0"
+version = "2.1.0"
description = "A collection of useful utilities for the Spyral analysis framework"
authors = [
{name = "Gordon McCann", email = "mccann@frib.msu.edu"},
{name = "Nathan Turi", email = "turi@frib.msu.edu"},
+ {name = "Daniel Bazin", email = "bazin@frib.msu.edu"},
]
dependencies = [
"numpy>=2.0, <2.2",
@@ -13,14 +14,17 @@ dependencies = [
"pycatima>=1.90",
"shapely>=2.0.6",
"vector>=1.3.0",
+ "pybind11>=3.0.0",
+ "eigenpip>=0.1.0",
]
requires-python = ">=3.10,<3.14"
readme = "README.md"
license = {text = "GPLv3"}
[build-system]
-requires = ["pdm-backend"]
-build-backend = "pdm.backend"
+requires = ["pdm-backend", "scikit-build-core", "pybind11"]
+# build-backend = "pdm.backend"
+build-backend = "scikit_build_core.build"
[dependency-groups]
dev = [
diff --git a/src/spyral_utils/tripclust/CMakeLists.txt b/src/spyral_utils/tripclust/CMakeLists.txt
new file mode 100644
index 0000000..41aa21f
--- /dev/null
+++ b/src/spyral_utils/tripclust/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.15...4.0)
+project(tripclust LANGUAGES CXX)
+
+set(PYBIND11_FINDPYTHON ON)
+find_package(pybind11 CONFIG REQUIRED)
+
+find_package (Eigen3 REQUIRED)
+include_directories(${EIGEN3_INCLUDE_DIR})
+
+pybind11_add_module(tripclust pybind.cpp src/postprocess.cpp src/directedgraph.cpp src/orthogonallsq.cpp src/pointcloud.cpp src/option.cpp src/cluster.cpp src/dnn.cpp src/graph.cpp src/triplet.cpp src/util.cpp src/hclust/fastcluster.cpp src/kdtree/kdtree.cpp)
+install(TARGETS tripclust DESTINATION .)
diff --git a/src/spyral_utils/tripclust/pybind.cpp b/src/spyral_utils/tripclust/pybind.cpp
new file mode 100644
index 0000000..56d8a5c
--- /dev/null
+++ b/src/spyral_utils/tripclust/pybind.cpp
@@ -0,0 +1,193 @@
+#include
+#include
+#include
+#include
+#include "src/pointcloud.h"
+#include "src/option.h"
+#include "src/cluster.h"
+#include "src/dnn.h"
+#include "src/graph.h"
+#include "src/postprocess.h"
+
+namespace py = pybind11;
+
+class tripclust {
+private:
+ Opt opt;
+ PointCloud cloud_xyz;
+ cluster_group cl_group;
+ bool postprocess;
+ int min_depth;
+
+public:
+ tripclust();
+ void fill_pointcloud(py::array_t);
+ void perform_clustering();
+ py::array_t get_cluster(int);
+ py::array_t get_labels();
+ int get_number_of_clusters();
+ // neighbour distance for smoothing
+ void set_r(const double value) {opt.r = value;}
+ void set_rdnn(const bool value) {opt.rdnn = value;} // compute r with dnn
+ // tested neighbours of triplet mid point
+ void set_k(const size_t value) {opt.k = value;}
+ // max number of triplets to one mid point
+ void set_n(const size_t value) {opt.n = value;}
+ // 1 - cos alpha, where alpha is the angle between the two triplet branches
+ void set_a(const double value) {opt.a = value;}
+ // distance scale factor in metric
+ void set_s(const double value) {opt.s = value;}
+ void set_sdnn(const bool value) {opt.sdnn = value;} // compute s with dnn
+ // threshold for cdist in clustering
+ void set_t(const double value) {opt.t = value;}
+ void set_tauto(const bool value) {opt.tauto = value;} // auto generate t
+ // maximum gap width
+ void set_dmax(const double value) {opt.dmax = value;}
+ void set_dmax_dnn(const bool value) {opt.dmax_dnn = value;} // use dnn for dmax
+ void set_ordered(const bool value) {opt.ordered = value;} // points are in chronological order
+ // linkage method for clustering
+ void set_link(const Linkage value) {opt.link = value;}
+ // min number of triplets per cluster
+ void set_m(const size_t value) {opt.m = value;}
+ // whether or not post processing should be enabled
+ void set_postprocess(const bool value) {postprocess = value;}
+ // minimum number of points making a branch in curve in post processing
+ void set_min_depth(const int value) {min_depth = value;}
+};
+
+tripclust::tripclust() {
+ Opt opt;
+ PointCloud pc;
+ cluster_group cg;
+ this->opt = opt;
+ this->cloud_xyz = pc;
+ this->cl_group = cg;
+ this->postprocess = false;
+ this->min_depth = 25;
+}
+
+// Load cloud_xyz from numpy array
+void tripclust::fill_pointcloud(py::array_t arr) {
+ py::buffer_info buf_info = arr.request();
+ double *ptr = static_cast(buf_info.ptr);
+ size_t ndim = buf_info.ndim;
+ std::vector shape = buf_info.shape;
+
+ // The numpy array is assumed to be a 2D cloud array from Spyral
+ for (size_t i = 0; i < shape[0]; i++) {
+ Point point;
+ point.x = ptr[i*shape[1]+0]; // x coordinate
+ point.y = ptr[i*shape[1]+1]; // y coordinate
+ point.z = ptr[i*shape[1]+2]; // z coordinate
+ this->cloud_xyz.push_back(point);
+ }
+ // this->cloud.setOrdered(true);
+ // this->cloud_xyz.set2d(false);
+}
+
+void tripclust::perform_clustering() {
+ if (this->opt.needs_dnn()) {
+ double dnn = std::sqrt(first_quartile(this->cloud_xyz));
+ this->opt.set_dnn(dnn);
+ if (dnn == 0.0) {
+ return; // Need to throw some kind of error
+ }
+ }
+ // Step 1) smoothing by position averaging of neighboring points
+ PointCloud cloud_xyz_smooth;
+ smoothen_cloud(this->cloud_xyz, cloud_xyz_smooth, this->opt.get_r());
+
+ // Step 2) finding triplets of approximately collinear points
+ std::vector triplets;
+ generate_triplets(cloud_xyz_smooth, triplets, this->opt.get_k(),
+ this->opt.get_n(), this->opt.get_a());
+
+ // Step 3) single link hierarchical clustering of the triplets
+ // cluster_group cl_group;
+ compute_hc(cloud_xyz_smooth, this->cl_group, triplets, this->opt.get_s(),
+ this->opt.get_t(), this->opt.is_tauto(), this->opt.get_dmax(),
+ this->opt.is_dmax(), this->opt.get_linkage(), 0);
+
+ // Step 4) pruning by removal of small clusters ...
+ cleanup_cluster_group(this->cl_group, this->opt.get_m(), 0);
+ cluster_triplets_to_points(triplets, this->cl_group);
+ // .. and (optionally) by splitting up clusters at gaps > dmax
+ if (this->opt.is_dmax()) {
+ cluster_group cleaned_up_cluster_group;
+ for (cluster_group::iterator cl = this->cl_group.begin(); cl != this->cl_group.end(); ++cl) {
+ max_step(cleaned_up_cluster_group, *cl, this->cloud_xyz, this->opt.get_dmax(),
+ this->opt.get_m() + 2);
+ }
+ this->cl_group = cleaned_up_cluster_group;
+ }
+ // store cluster labels in points
+ add_clusters(this->cloud_xyz, this->cl_group, 0);
+ // Step 5) Optionally perform post processing to find sub clusters
+ if (this->postprocess) {
+ int nchanged;
+ nchanged = process_pointcloud(this->cloud_xyz, this->min_depth, 0);
+ }
+}
+
+// Get an individual cluster array
+py::array_t tripclust::get_cluster(int clu) {
+ std::vector cluster;
+ for (size_t i=0; icl_group[clu].size(); ++i)
+ cluster.push_back(this->cl_group[clu][i]);
+ return py::array_t(cluster.size(), cluster.data());
+}
+
+// Get an array containing the cluster labels for each point
+py::array_t tripclust::get_labels() {
+ std::vector labels(this->cloud_xyz.size(), -1);
+ for (size_t i=0; icloud_xyz.size(); ++i) {
+ std::set::iterator iterator=this->cloud_xyz[i].cluster_ids.begin();
+ if (this->cloud_xyz[i].cluster_ids.size() > 0) labels[i] = *iterator; // Choose the first cluster id
+ }
+ //
+ // for (size_t clu=0; clucl_group.size(); ++clu) {
+ // for (size_t i=0; icl_group[clu].size(); ++i) {
+ // labels[this->cl_group[clu][i]] = clu;
+ // }
+ // }
+ return py::array_t(labels.size(), labels.data());
+}
+
+// Get the number of clusters
+int tripclust::get_number_of_clusters(){
+ int nclu = 0;
+ std::vector test(this->cloud_xyz.size(), false);
+ for (size_t i=0; icloud_xyz.size(); ++i) {
+ for (std::set::iterator j=this->cloud_xyz[i].cluster_ids.begin(); j!=this->cloud_xyz[i].cluster_ids.end(); ++j)
+ test[*j] = true;
+ }
+ for (size_t i=0; icl_group.size();
+}
+
+PYBIND11_MODULE(tripclust, m, py::mod_gil_not_used()) {
+ py::class_(m, "tripclust")
+ .def(py::init())
+ .def("fill_pointcloud", &tripclust::fill_pointcloud, "Fills point cloud")
+ .def("perform_clustering", &tripclust::perform_clustering, "Performs the tripclust clustering")
+ .def("get_number_of_clusters", &tripclust::get_number_of_clusters, "Get the number of clusters")
+ .def("get_cluster", &tripclust::get_cluster, "Retrieves individual cluster")
+ .def("get_labels", &tripclust::get_labels, "Returns an array containing the label for each point")
+ .def("set_r", &tripclust::set_r, "neighbour distance for smoothing")
+ .def("set_rdnn", &tripclust::set_rdnn, "compute r with dnn")
+ .def("set_k", &tripclust::set_k, "tested neighbours of triplet mid point")
+ .def("set_n", &tripclust::set_n, "max number of triplets to one mid point")
+ .def("set_a", &tripclust::set_a, "1 - cos alpha, where alpha is the angle between the two triplet branches")
+ .def("set_s", &tripclust::set_s, "distance scale factor in metric")
+ .def("set_sdnn", &tripclust::set_sdnn, "compute s with dnn")
+ .def("set_t", &tripclust::set_t, "threshold for cdist in clustering")
+ .def("set_tauto", &tripclust::set_tauto, "auto generate t")
+ .def("set_dmax", &tripclust::set_dmax, "maximum gap width")
+ .def("set_dmax_dnn", &tripclust::set_dmax_dnn, "use dnn for dmax")
+ .def("set_ordered", &tripclust::set_ordered, "points are in chronological order")
+ .def("set_link", &tripclust::set_link, "linkage method for clustering")
+ .def("set_m", &tripclust::set_m, "min number of triplets per cluster")
+ .def("set_postprocess", &tripclust::set_postprocess, "whether or not post processing should be enabled")
+ .def("set_min_depth", &tripclust::set_min_depth, "minimum number of points making a branch in curve in post processing");
+}
diff --git a/src/spyral_utils/tripclust/src/cluster.cpp b/src/spyral_utils/tripclust/src/cluster.cpp
new file mode 100644
index 0000000..16e44e8
--- /dev/null
+++ b/src/spyral_utils/tripclust/src/cluster.cpp
@@ -0,0 +1,259 @@
+//
+// cluster.cpp
+// Functions for triplet clustering and for propagating
+// the triplet cluster labels to points
+//
+// Author: Jens Wilberg, Lukas Aymans, Christoph Dalitz
+// Date: 2019-04-02
+// License: see ../LICENSE
+//
+
+#include
+#include
+#include
+
+#include "cluster.h"
+#include "hclust/fastcluster.h"
+
+// compute mean of *a* with size *m*
+double mean(const double *a, size_t m) {
+ double sum = 0;
+ for (size_t i = 0; i < m; ++i) {
+ sum += a[i];
+ }
+ return sum / m;
+}
+
+// compute standard deviation of *a* with size *m*
+double sd(const double *a, size_t m) {
+ double sum = 0, result;
+ double mean_val = mean(a, m);
+ for (size_t k = 0; k < m; ++k) {
+ double tmp = mean_val - a[k];
+ sum += (tmp * tmp);
+ }
+ result = (1.0 / (m - 1.0)) * sum;
+ return std::sqrt(result);
+}
+
+//-------------------------------------------------------------------
+// computation of condensed distance matrix.
+// The distance matrix is computed from the triplets in *triplets*
+// and saved in *result*. *triplet_metric* is used as distance metric.
+//-------------------------------------------------------------------
+void calculate_distance_matrix(const std::vector &triplets,
+ const PointCloud &cloud, double *result,
+ ScaleTripletMetric &triplet_metric) {
+ size_t const triplet_size = triplets.size();
+ size_t k = 0;
+
+ for (size_t i = 0; i < triplet_size; ++i) {
+ for (size_t j = i + 1; j < triplet_size; j++) {
+ result[k++] = triplet_metric(triplets[i], triplets[j]);
+ }
+ }
+}
+
+//-------------------------------------------------------------------
+// Computation of the clustering.
+// The triplets in *triplets* are clustered by the fastcluster algorithm
+// and the result is returned as cluster_group. *t* is the cut distance
+// and *triplet_metric* is the distance metric for the triplets.
+// *opt_verbose* is the verbosity level for debug outputs. the clustering
+// is returned in *result*.
+//-------------------------------------------------------------------
+void compute_hc(const PointCloud &cloud, cluster_group &result,
+ const std::vector &triplets, double s, double t,
+ bool tauto, double dmax, bool is_dmax, Linkage method,
+ int opt_verbose) {
+ const size_t triplet_size = triplets.size();
+ size_t k, cluster_size;
+ hclust_fast_methods link;
+
+ if (!triplet_size) {
+ // if no triplets are generated
+ return;
+ }
+ // choose linkage method
+ switch (method) {
+ case SINGLE:
+ link = HCLUST_METHOD_SINGLE;
+ break;
+ case COMPLETE:
+ link = HCLUST_METHOD_COMPLETE;
+ break;
+ case AVERAGE:
+ link = HCLUST_METHOD_AVERAGE;
+ break;
+ }
+
+ double *distance_matrix = new double[(triplet_size * (triplet_size - 1)) / 2];
+ double *cdists = new double[triplet_size - 1];
+ int *merge = new int[2 * (triplet_size - 1)], *labels = new int[triplet_size];
+ ScaleTripletMetric metric(s);
+ calculate_distance_matrix(triplets, cloud, distance_matrix, metric);
+
+ hclust_fast(triplet_size, distance_matrix, link, merge, cdists);
+
+ // splitting the dendrogram into clusters
+ if (tauto) {
+ // automatic stopping criterion where cdist is unexpected large
+ for (k = (triplet_size - 1) / 2; k < (triplet_size - 1); ++k) {
+ if ((cdists[k - 1] > 0.0 || cdists[k] > 1.0e-8) &&
+ (cdists[k] > cdists[k - 1] + 2 * sd(cdists, k + 1))) {
+ break;
+ }
+ }
+ if (opt_verbose) {
+ double automatic_t;
+ double prev_cdist = (k > 0) ? cdists[k - 1] : 0.0;
+ if (k < (triplet_size - 1)) {
+ automatic_t = (prev_cdist + cdists[k]) / 2.0;
+ } else {
+ automatic_t = cdists[k - 1];
+ }
+ std::cout << "[Info] optimal cdist threshold: " << automatic_t
+ << std::endl;
+ }
+ } else {
+ // fixed threshold t
+ for (k = 0; k < (triplet_size - 1); ++k) {
+ if (cdists[k] >= t) {
+ break;
+ }
+ }
+ }
+ cluster_size = triplet_size - k;
+ cutree_k(triplet_size, merge, cluster_size, labels);
+
+ // generate clusters
+ for (size_t i = 0; i < cluster_size; ++i) {
+ cluster_t new_cluster;
+ result.push_back(new_cluster);
+ }
+
+ for (size_t i = 0; i < triplet_size; ++i) {
+ result[labels[i]].push_back(i);
+ }
+
+ if (opt_verbose > 1) {
+ // write debug file
+ const char *fname = "debug_cdist.csv";
+ std::ofstream of(fname);
+ of << std::fixed; // set float style
+ if (of.is_open()) {
+ for (size_t i = 0; i < (triplet_size - 1); ++i) {
+ of << cdists[i] << std::endl;
+ }
+ } else {
+ std::cerr << "[Error] could not write file '" << fname << "'\n";
+ }
+ of.close();
+ }
+
+ // cleanup
+ delete[] distance_matrix;
+ delete[] cdists;
+ delete[] merge;
+ delete[] labels;
+}
+
+//-------------------------------------------------------------------
+// Remove all clusters in *cl_group* which contains less then *m*
+// triplets. *cl_group* will be modified.
+//-------------------------------------------------------------------
+void cleanup_cluster_group(cluster_group &cl_group, size_t m, int opt_verbose) {
+ size_t old_size = cl_group.size();
+ cluster_group::iterator it = cl_group.begin();
+ while (it != cl_group.end()) {
+ if (it->size() < m) {
+ it = cl_group.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ if (opt_verbose > 0) {
+ std::cout << "[Info] in pruning removed clusters: "
+ << old_size - cl_group.size() << std::endl;
+ }
+}
+
+//-------------------------------------------------------------------
+// Convert the triplet indices ind *cl_group* to point indices.
+// *triplets* contains all triplets and *cl_group* will be modified.
+//-------------------------------------------------------------------
+void cluster_triplets_to_points(const std::vector &triplets,
+ cluster_group &cl_group) {
+ for (size_t i = 0; i < cl_group.size(); ++i) {
+ cluster_t point_indices, ¤t_cluster = cl_group[i];
+ for (std::vector::const_iterator triplet_index =
+ current_cluster.begin();
+ triplet_index < current_cluster.end(); ++triplet_index) {
+ const triplet ¤t_triplet = triplets[*triplet_index];
+ point_indices.push_back(current_triplet.point_index_a);
+ point_indices.push_back(current_triplet.point_index_b);
+ point_indices.push_back(current_triplet.point_index_c);
+ }
+ // sort point-indices and remove duplicates
+ std::sort(point_indices.begin(), point_indices.end());
+ std::vector::iterator new_end =
+ std::unique(point_indices.begin(), point_indices.end());
+ point_indices.resize(std::distance(point_indices.begin(), new_end));
+ // replace triplet cluster with point cluster
+ current_cluster = point_indices;
+ }
+}
+
+//-------------------------------------------------------------------
+// Adds the cluster ids to the points in *cloud*
+// *cl_group* contains the clusters with the point indices. For every
+// point the corresponding cluster id is saved. For gnuplot the points
+// which overlap between multiple clusteres are saved in seperate
+// clusters in *cl_group*
+//-------------------------------------------------------------------
+void add_clusters(PointCloud &cloud, cluster_group &cl_group, bool gnuplot) {
+ for (size_t i = 0; i < cl_group.size(); ++i) {
+ const std::vector ¤t_cluster = cl_group[i];
+ for (std::vector::const_iterator point_index =
+ current_cluster.begin();
+ point_index != current_cluster.end(); ++point_index) {
+ cloud[*point_index].cluster_ids.insert(i);
+ }
+ }
+
+ // add point indices to the corresponding cluster/vertex vector. this is for
+ // the gnuplot output
+ if (gnuplot) {
+ std::vector verticies;
+ for (size_t i = 0; i < cloud.size(); ++i) {
+ const Point &p = cloud[i];
+ if (p.cluster_ids.size() > 1) {
+ // if the point is in multiple clusters add it to the corresponding
+ // vertex or create one if none exists
+ bool found = false;
+ for (std::vector::iterator v = verticies.begin();
+ v != verticies.end(); ++v) {
+ if (cloud[v->at(0)].cluster_ids == p.cluster_ids) {
+ v->push_back(i);
+ found = true;
+ }
+ }
+ if (!found) {
+ cluster_t v;
+ v.push_back(i);
+ verticies.push_back(v);
+ }
+ // remove the point from all other clusters
+ for (std::set::iterator it = p.cluster_ids.begin();
+ it != p.cluster_ids.end(); ++it) {
+ cluster_t &cluster = cl_group[*it];
+ cluster.erase(std::remove(cluster.begin(), cluster.end(), i),
+ cluster.end());
+ }
+ }
+ }
+ // extend clusters with verticies
+ cl_group.reserve(cl_group.size() + verticies.size());
+ cl_group.insert(cl_group.end(), verticies.begin(), verticies.end());
+ }
+}
diff --git a/src/spyral_utils/tripclust/src/cluster.h b/src/spyral_utils/tripclust/src/cluster.h
new file mode 100644
index 0000000..00a6b44
--- /dev/null
+++ b/src/spyral_utils/tripclust/src/cluster.h
@@ -0,0 +1,37 @@
+//
+// cluster.h
+// Functions for triplet clustering and for propagating
+// the triplet cluster labels to points
+//
+// Author: Jens Wilberg, Lukas Aymans, Christoph Dalitz
+// Date: 2018-08-30
+// License: see ../LICENSE
+//
+
+#ifndef CLUSTER_H
+#define CLUSTER_H
+
+#include
+#include
+
+#include "triplet.h"
+#include "util.h"
+
+typedef std::vector cluster_t;
+
+typedef std::vector cluster_group;
+
+// compute hierarchical clustering
+void compute_hc(const PointCloud &cloud, cluster_group &result,
+ const std::vector &triplets, double s, double t,
+ bool tauto = false, double dmax = 0, bool is_dmax = false,
+ Linkage method = SINGLE, int opt_verbose = 0);
+// remove all small clusters
+void cleanup_cluster_group(cluster_group &cg, size_t m, int opt_verbose = 0);
+// convert the triplet indices ind *cl_group* to point indices.
+void cluster_triplets_to_points(const std::vector &triplets,
+ cluster_group &cl_group);
+// adds the cluster ids to the points in *cloud*
+void add_clusters(PointCloud &cloud, cluster_group &cl_group,
+ bool gnuplot = false);
+#endif
diff --git a/src/spyral_utils/tripclust/src/directedgraph.cpp b/src/spyral_utils/tripclust/src/directedgraph.cpp
new file mode 100644
index 0000000..60005e6
--- /dev/null
+++ b/src/spyral_utils/tripclust/src/directedgraph.cpp
@@ -0,0 +1,433 @@
+//
+// directedgraph.cpp
+// Class for directed graph and building directed MST.
+//
+// Author: Christoph Dalitz
+// Date: 2022-11-07
+// License: see ../LICENSE
+//
+/** @file */
+
+#include "directedgraph.h"
+#include "util.h"
+
+#include /* sqrt */
+#include
+#include /* assert */
+
+/**
+ * @brief Return the euklidian distance of two Kdtree::CoordPoints
+ *
+ * @param a
+ * @param b
+ * @return double
+ */
+double euklidian_distance(Kdtree::CoordPoint a, Kdtree::CoordPoint b) {
+ return sqrt(((a[0]-b[0])*(a[0]-b[0]))+((a[1]-b[1])*(a[1]-b[1]))+((a[2]-b[2])*(a[2]-b[2])));
+}
+
+/**
+ * @brief Construct a new Graph:: Graph object. This includes building an MST.
+ * @param cloud
+ * @param indices
+ */
+Graph::Graph(PointCloud & cloud, std::vector indices) {
+ // check input
+ assert(cloud.size() > 0);
+ assert(indices.size() > 0);
+ assert(std::set(indices.begin(), indices.end()).size() == indices.size()); // indices are unique
+ assert((indices.back() < cloud.size())); // checks range of indices
+
+ this->indices = indices;
+
+ // insert Nodes without Edges. Nodes have x,y,z Coordinates and the corresponding index
+ for(unsigned int i = 0; i < indices.size(); i++) {
+ graph.insert(std::pair(indices[i], Node(indices[i], cloud[indices[i]].x, cloud[indices[i]].y, cloud[indices[i]].z)));
+ }
+
+ // Add first subtree: the first Point by time and the last Point by Index
+ this->subtree_roots.push_back(&graph.at(indices[indices.size()-1]));
+
+ // construct MST by adding Edges
+ constructMST();
+}
+
+/**
+ * @brief Constructs a KDTree. Requires the graph and indices attribute to be set
+ *
+ * @return Kdtree::KdTree
+ */
+Kdtree::KdTree Graph::constructKDTree() {
+ // build kdtree
+ Kdtree::KdNodeVector nodes;
+ size_t index;
+ for (std::vector::iterator iterator = indices.begin(); iterator != indices.end(); ++iterator) {
+ index = *iterator;
+ Kdtree::CoordPoint p={graph.at(index).x, graph.at(index).y, graph.at(index).z};
+ nodes.push_back(Kdtree::KdNode(p, NULL, index));
+ }
+ Kdtree::KdTree kdtree(&nodes);
+ return kdtree;
+}
+
+/**
+ * @brief Constructs MST with Chu-Lui/Edmond's algorithm. Simplified, because we assume no cycles are possible due to strict ordering.
+ *
+ */
+void Graph::constructMST() {
+ // build kdtree
+ Kdtree::KdTree kdtree = constructKDTree();
+
+ double cost;
+ size_t nn_idx;
+ size_t last = indices.back();
+ size_t index;
+ // for vertex in graph do
+ for (std::vector::iterator iterator = indices.begin(); iterator != indices.end(); ++iterator) {
+ index=*iterator;
+ // no outgoing edges to be added at last Node
+ if(index!=last) {
+ Kdtree::CoordPoint p = {graph.at(index).x, graph.at(index).y, graph.at(index).z};
+ Kdtree::KdNodeVector neighbors;
+ Predicate predicate = Predicate(index);
+ std::vector distances;
+ kdtree.k_nearest_neighbors(p, 1, &neighbors, &distances, &predicate);
+ // nearest neighbor
+ if(neighbors.size()== 0) {
+ std::cout<<"neighbors is empty!!!"<::iterator iterator = indices.begin(); iterator != indices.end(); ++iterator) {
+ idx = *iterator;
+ removed += graph.at(idx).removeBranchEdges(depth, &removed_branch_edges,&subtree_roots);
+ }
+ if(removed>0) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * @brief Calls removeLongEdges for every Node in the Graph.
+ *
+ */
+void Graph::removeLongEdges() {
+ size_t removed = 0;
+ size_t idx;
+ for (std::vector::iterator iterator = indices.begin(); iterator != indices.end(); ++iterator) {
+ idx=*iterator;
+ removed += graph.at(idx).removeLongEdges(&removed_long_edges,&subtree_roots);
+ }
+}
+
+/**
+ * @brief Does a breath-first search starting at each of the Segment Heads to assign new Curve IDs.
+ *
+ * @return std::vector< std::vector >
+ */
+std::vector< std::vector > Graph::assignNewCurveIds() {
+ int current_id = 0;
+ std::vector< std::vector > result(subtree_roots.size());
+ std::vector open_list;
+ std::vector closed_list;
+ std::vector temp;
+ size_t v;
+ Node * node_ptr;
+ size_t i;
+ // for segment Head
+ for (std::vector::iterator iterator = subtree_roots.begin(); iterator != subtree_roots.end(); ++iterator) {
+ node_ptr = *iterator;
+ open_list.clear();
+ closed_list.clear();
+ closed_list.push_back(node_ptr->index);
+ open_list.push_back(node_ptr->index);
+ result[current_id].push_back(node_ptr->index);
+ while(!open_list.empty()) {
+ v = open_list.front();
+ open_list.erase(open_list.begin());
+ temp.clear();
+ graph.at(v).get_next(&temp, true);
+ for (std::vector::iterator iterator2 = temp.begin(); iterator2 != temp.end(); ++iterator2) {
+ i = *iterator2;
+ if(std::find(closed_list.begin(), closed_list.end(), i) == closed_list.end()) { // if not in closed list
+ closed_list.push_back(i);
+ result[current_id].push_back(i);
+ open_list.push_back(i);
+ }
+ }
+ }
+ current_id++;
+ }
+ return result;
+}
+
+/**
+ * @brief Generates two Gnuplot files in the calling directory: One with the filename [filename_prefix]_edgeRemoval.gnuplot and another with the filename [filename_prefix]_segmentation.gnuplot
+ * [filename_prefix]_edgeRemoval.gnuplot:
+ * * Demonstrates the Edges that are removed either because they are long or because they are head of a branch.
+ * * The color of the points corresponds to the index to show the order of the points. The new roots of the resulting subtrees are also shown.
+ * @param f
+ * @param filename_prefix
+ */
+void Graph::generateGnuplot(bool f, std::string filename_prefix) {
+
+ // option for graph output:
+ // 1: Remove Edges Plot
+ // 2: Cluster-Ids Plot
+
+
+ if(f) {
+ // demo edge Removal in MST
+ std::ofstream outfile1 (filename_prefix+"_edgeRemoval.gnuplot");
+ std::vector idx;
+ size_t index;
+ size_t edge_to;
+ Edge e;
+ Edge edge;
+ Node * node_ptr;
+ if ( outfile1 ) {
+ outfile1<<"set palette rgb 34,35,36;\n";
+ //outfile<<"splot '-' using 1:2:3 with points palette; pause -1;\n";
+ outfile1<<"splot '-' using 1:2:3 with points palette pointtype 7 ps 1, '-' with lines lc 'black' title 'Kept', '-' with lines lc 'green' title 'Removed (Too long)', '-' with lines lc 'magenta' title 'Removed (Is Branch)', '-' with points lc 'web-blue' pointtype 1 ps 2 title 'Head of Segment'; pause -1;\n"; //, '-' using 1:2:3 with points lc 'blue' ps 1, '-' with lines lc 'blue' title 'Removal Candidate'; pause -1;\n";
+ for (std::vector::iterator iterator = indices.begin(); iterator != indices.end(); ++iterator) {
+ index = *iterator;
+ outfile1<::iterator iterator = indices.begin(); iterator != indices.end(); ++iterator) {
+ index = *iterator;
+ graph[index].get_next(&idx);
+ for (std::vector::iterator iterator2 = idx.begin(); iterator2 != idx.end(); ++iterator2) {
+ edge_to = *iterator2;
+ outfile1<::iterator iterator = this->removed_long_edges.begin(); iterator != this->removed_long_edges.end(); ++iterator) {
+ edge = *iterator;
+ outfile1<x<<" "<y<<" "<z<<"\n";
+ outfile1<x<<" "<y<<" "<z<<"\n";
+ outfile1<<"\n\n";
+ }
+ outfile1<<"e\n";
+ for (std::vector::iterator iterator = this->removed_branch_edges.begin(); iterator != this->removed_branch_edges.end(); ++iterator) {
+ edge = *iterator;
+ outfile1<x<<" "<y<<" "<z<<"\n";
+ outfile1<x<<" "<y<<" "<z<<"\n";
+ outfile1<<"\n\n";
+ }
+ outfile1<<"e\n";
+ for (std::vector::iterator iterator = this->subtree_roots.begin(); iterator != this->subtree_roots.end(); ++iterator) {
+ node_ptr = *iterator;
+ outfile1<x<<" "<y<<" "<z<<"\n";
+ }
+ outfile1.close();
+ }
+ // demo segmentation
+ std::ofstream outfile(filename_prefix+"_segmentation.gnuplot");
+ std::vector< std::vector > result;
+ result = assignNewCurveIds();
+ if ( outfile ) {
+ outfile<<"splot '-' with points lc 'black' pt 1, ";
+ for(unsigned int cluster = 0; cluster::iterator iterator = indices.begin(); iterator != indices.end(); ++iterator) {
+ index = *iterator;
+ outfile<::iterator iterator = result[cluster].begin(); iterator != result[cluster].end(); ++iterator) {
+ index = *iterator;
+ outfile<::iterator iterator = subtree_roots.begin(); iterator != subtree_roots.end(); ++iterator) {
+ node_ptr = *iterator;
+ outfile<x<<" "<y<<" "<z<<"\n";
+ }
+ outfile<<"e\n";
+ outfile.close();
+ }
+ }
+}
+
+/**
+ * @brief Does a breadth-first search until the max_depth (4). Returns the count of the Edges and the sum of the costs of earch encountered edge.
+ * pair : N edges, Sum of the Cost of the Edges
+ * @return std::pair
+ */
+std::pair Node::get_neighbourhood() {
+ int max_depth = 4; // 4-er Nachbarschaft
+ double sum = 0;
+ double N = 0;
+ // open_list is queue
+ std::vector open_list;
+ std::vector closed_list;
+ Node * v = this;
+ int level=0;
+ Edge e;
+
+ // null Idee von: https://stackoverflow.com/questions/31247634/how-to-keep-track-of-depth-in-breadth-first-search
+
+ open_list.push_back(this);
+ open_list.push_back(NULL);
+ while( (! open_list.empty() ) && (level < max_depth) ) {
+ v = open_list.front(); // queueify
+ open_list.erase(open_list.begin());
+ if(v == NULL){
+ level++;
+ //if(open_list.back() == NULL) break; // changed
+ open_list.push_back(NULL);
+ } else {
+ closed_list.push_back(v);
+ for (std::vector::iterator iterator = v->_out.begin(); iterator != v->_out.end(); ++iterator) {
+ e = *iterator;
+ if(std::find(closed_list.begin(), closed_list.end(), e.end) == closed_list.end()) {
+ sum += e.cost;
+ N ++;
+ open_list.push_back(e.end);
+ }
+ }
+ for (std::vector::iterator iterator = v->_in.begin(); iterator != v->_in.end(); ++iterator) {
+ e = *iterator;
+ if(std::find(closed_list.begin(), closed_list.end(), e.begin) == closed_list.end()) {
+ sum += e.cost;
+ N ++;
+ open_list.push_back(e.begin);
+ }
+ }
+ }
+
+ }
+ return std::pair(N,sum);
+}
+
+/**
+ * @brief Removes the longest Edge(s) leading into a branch of a depth of at least min_depth. Adds the next Node as a new Segment Head and the Edge to a debug vector.
+ * Returns count of removed Edges.
+ * @param min_depth
+ * @param debug_removed_edges
+ * @param segment_heads
+ * @return size_t
+ */
+size_t Node::removeBranchEdges(size_t min_depth, std::vector * debug_removed_edges, std::vector * segment_heads) {
+
+ size_t counter=0;
+ std::vector removal_candidates;
+ Edge i;
+ Edge i2;
+
+ if(this->isBranchRoot()) { // check if this node is Root of a branch (has > 1 Incoming Edge)
+ for(std::vector::iterator it = _in.begin(); it != _in.end();it++) { // for every edge:
+ if(it->begin->depthSearch(min_depth)) { // check depth of branch
+ // if depth > min_depth
+ counter++;
+ removal_candidates.push_back(*it);
+ }
+ }
+ if(counter >= 2) { // is this Node Root at least 2 long Branches?
+ std::vector::iterator iteratorMinElement = std::min_element(removal_candidates.begin(), removal_candidates.end());
+ removal_candidates.erase(iteratorMinElement);
+ for (std::vector::iterator iterator = removal_candidates.begin(); iterator != removal_candidates.end(); ++iterator) {
+ i = *iterator;
+ debug_removed_edges->push_back(i);
+ }
+ std::vector temp_in = _in;
+ _in.clear();
+ for (std::vector::iterator iterator = temp_in.begin(); iterator != temp_in.end(); ++iterator) {
+ i2 = *iterator;
+ if (! (std::find(removal_candidates.begin(), removal_candidates.end(), i2) != removal_candidates.end())) {
+ _in.push_back(i2);
+ } else {
+ i.begin->remove_out_edge(this);
+ segment_heads->push_back(i2.begin);
+ }
+ }
+ temp_in.clear();
+ removal_candidates.clear();
+ return counter-1;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * @brief Calls get_neighbourhood and then checks for every incoming edge e with weight w:
+ * * get the mean weight length of the neighbourhood edges (without e): n_mean
+ * * if w > (n_mean*4.5):
+ * * * edge should be removed due to being long
+ * * * add the edge to the debug vector
+ * * * add the begin of the edge to the segment head vector
+ * * * remove edge e
+ * @param debug_removed_edges: is potentially added to
+ * @param segment_heads: is potentially added to
+ * @return size_t: how many incoming edges to this Node were removed
+ */
+size_t Node::removeLongEdges(std::vector * debug_removed_edges, std::vector * segment_heads) {
+ std::pair n_sum = get_neighbourhood();
+ double cost_sum = n_sum.second;
+ int n_edges = n_sum.first;
+ size_t counter = 0;
+ double n_mean = 0;
+ for(std::vector::iterator it = _in.begin(); it != _in.end();) { // for every edge, do
+ n_mean = (cost_sum-it->cost)/(n_edges-1);
+ if(it->cost > (n_mean*4.5)) { // if cost of edge > "Vielfaches" (4.5) of mean Neighbourhood Costs.
+ debug_removed_edges->push_back(*it); // add edge to removed_edges in Graph object, for gnuplot debug file
+ segment_heads->push_back(it->begin);
+ it->begin->remove_out_edge(this);
+ it = remove_in_edge(it); // remove the edge.
+ counter++;
+ } else {
+ it++;
+ }
+ }
+ return counter;
+}
+
+/**
+ * @brief gets the indexes of the connected Nodes, either just outgoing edges or also incoming edges if full=true
+ *
+ * @return result
+ * @param full: true if you want indexes of both incoming and outgoing edges
+ */
+void Node::get_next(std::vector * result, bool full) {
+ (*result).clear();
+ Edge edge;
+ for (std::vector::iterator iterator = _out.begin(); iterator != _out.end(); ++iterator) {
+ edge = *iterator;
+ (*result).push_back(edge.end->index);
+ }
+ if(full) {
+ for (std::vector::iterator iterator = _in.begin(); iterator != _in.end(); ++iterator) {
+ edge = *iterator;
+ (*result).push_back(edge.begin->index);
+ }
+ }
+
+}
diff --git a/src/spyral_utils/tripclust/src/directedgraph.h b/src/spyral_utils/tripclust/src/directedgraph.h
new file mode 100644
index 0000000..2ef6569
--- /dev/null
+++ b/src/spyral_utils/tripclust/src/directedgraph.h
@@ -0,0 +1,209 @@
+//
+// directedgraph.h
+// Class for directed graph and building directed MST.
+//
+// Author: Christoph Dalitz
+// Date: 2022-11-07
+// License: see ../LICENSE
+//
+/** @file */
+
+#ifndef DIRECTEDGRAPH_H
+#define DIRECTEDGRAPH_H
+
+
+#include
+#include
+#include