Branch data Line data Source code
1 : : #pragma once 2 : : 3 : : #include <Eigen/Dense> 4 : : #include <mutable/mutable.hpp> 5 : : #include <vector> 6 : : 7 : : 8 : : namespace m { 9 : : 10 : : using MatrixRXf = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; 11 : : 12 : : /** Compute initial cluster centroids using *k*-means++ algorithm. See https://en.wikipedia.org/wiki/K-means%2B%2B. 13 : : * 14 : : * @param data the data to cluster, as `Eigen::Matrix`; rows are data points, columns are attributes of data points 15 : : * @param k the number of initial centroids to compute 16 : : * @return a `MatrixRXf` of `k` data points (rows) chosen as the `k` initial centroids for *k*-means 17 : : */ 18 : : MatrixRXf M_EXPORT kmeans_plus_plus(const Eigen::MatrixXf &data, unsigned k); 19 : : 20 : : /** Clusters the given data according to the *k*-means algorithm. 21 : : * 22 : : * The `k` clusters are represented by `k` unique integers (usually from the range *[0, k)*). The *k*-means algorithm 23 : : * assigns an integer label to each data point, identifying to which cluster the data point is assigned. See 24 : : * https://en.wikipedia.org/wiki/K-means_clustering. 25 : : * 26 : : * @param data the data to cluster, as `Eigen::Matrix`; rows are data points, columns are attributes of data points 27 : : * @param k the number of clusters to form (more like an upper bound, as clusters may be empty) 28 : : * @return a `std::pair` of a `std::vector<unsigned>` assigning a label to each data point and an `Eigen::Matrix` of 29 : : * `k` rows with the centroids of the formed clusters 30 : : */ 31 : : std::pair<std::vector<unsigned>, MatrixRXf> M_EXPORT kmeans_with_centroids(const Eigen::MatrixXf &data, unsigned k); 32 : : 33 : : /** Clusters the given data according to the *k*-means algorithm. 34 : : * 35 : : * The `k` clusters are represented by `k` unique integers (usually from the range *[0, k)*). The *k*-means algorithm 36 : : * assigns an integer label to each data point, identifying to which cluster the data point is assigned. See 37 : : * https://en.wikipedia.org/wiki/K-means_clustering. 38 : : * 39 : : * @param data the data to cluster, as `Eigen::Matrix`; rows are data points, columns are attributes of data points 40 : : * @param k the number of clusters to form (more like an upper bound, as clusters may be empty) 41 : : * @return a `std::vector<unsigned>` assigning a label to each data point 42 : : */ 43 : 5 : inline std::vector<unsigned> M_EXPORT kmeans(const Eigen::MatrixXf &data, unsigned k) { 44 : 5 : return kmeans_with_centroids(data, k).first; 45 : : } 46 : : 47 : : }