(systemds) branch main updated: [SYSTEMDS-3858] New HDBSCAN builtin function

mboehm7 Sun, 29 Mar 2026 02:13:39 -0700

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new 00e02f32bf [SYSTEMDS-3858] New HDBSCAN builtin function
00e02f32bf is described below

commit 00e02f32bf7858abfbac4eb2ceb42b4bb5984f4e
Author: anuunchin <[email protected]>
AuthorDate: Sun Mar 29 11:06:29 2026 +0200

    [SYSTEMDS-3858] New HDBSCAN builtin function
    
    Closes #2381.
---
 scripts/staging/hdbscan/hdbscan.dml                | 456 +++++++++++++++++++++
 scripts/staging/hdbscan/test_build_hierarchy.dml   | 142 +++++++
 scripts/staging/hdbscan/test_build_mst.dml         |  68 +++
 scripts/staging/hdbscan/test_cluster_model.dml     | 151 +++++++
 .../hdbscan/test_extract_stabe_clusters.dml        | 158 +++++++
 scripts/staging/hdbscan/test_hdbscan.dml           |  73 ++++
 scripts/staging/hdbscan/test_kth_smallest.dml      |  36 ++
 .../staging/hdbscan/test_mutual_reachability.dml   |  48 +++
 scripts/staging/hdbscan/test_union_find.dml        | 161 ++++++++
 9 files changed, 1293 insertions(+)

diff --git a/scripts/staging/hdbscan/hdbscan.dml 
b/scripts/staging/hdbscan/hdbscan.dml
new file mode 100644
index 0000000000..5d989574a9
--- /dev/null
+++ b/scripts/staging/hdbscan/hdbscan.dml
@@ -0,0 +1,456 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# The hdbscan function is used to perform the hdbscan clustering
+# algorithm using knn-based mutual reachability distance and minimum spanning 
tree.
+#
+# INPUT:
+# ------------------------------------------------------------
+# X             The input Matrix to do hdbscan on.
+# minPts        Minimum number of points for core distance computation. 
(Defaults to 5)
+# minClSize     Minimum cluster size (Defaults to minPts)              
+# ------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------
+# clusterMems   Cluster labels for each point
+# clusterModel  The cluster centroids for prediction
+# ------------------------------------------------------------
+
+# TODO: m,s , f?
+m_hdbscan = function(Matrix[Double] X, Integer minPts = 5, Integer minClSize = 
-1)
+    return (Matrix[Double] clusterMems, Matrix[Double] clusterModel)
+{
+    if(minPts < 2) {
+        stop("HDBSCAN: minPts should be at least 2")
+    }
+
+    if(minClSize < 0) {
+        minClSize = minPts
+    }
+
+    n = nrow(X)
+    d = ncol(X)
+
+    if(n < minPts) {
+        stop("HDBSCAN: Number of data points should be at least minPts")
+    }
+
+    distances = dist(X)
+
+    coreDistances = matrix(0, rows=n, cols=1)
+    for(i in 1:n) {
+        kthDist = computeKthSmallest(t(distances[i,]), minPts)  # Add t() here!
+        coreDistances[i] = kthDist
+    }
+   
+    mutualReachDist = computeMutualReachability(distances, coreDistances)
+
+    [mstEdges, mstWeights] = buildMST(mutualReachDist, n)
+
+    [hierarchy, clusterSizes] = buildHierarchy(mstEdges, mstWeights, n)
+
+    [clusterMems, stabilities, clusterToNode] = 
extractStableClusters(hierarchy, mstWeights, n, minClSize)
+
+    [centroids, clusterInfo] = buildClusterModel(X, clusterMems, stabilities, 
clusterToNode)
+
+    clusterModel = centroids
+}
+
+
+computeKthSmallest = function(Matrix[Double] array, Integer k)
+    return (Double res)
+{
+  sorted = order(target=array, by=1, decreasing=FALSE)
+  res = as.scalar(sorted[k+1, 1])
+}
+
+
+computeMutualReachability = function(Matrix[Double] distances, Matrix[Double] 
coreDistances)
+    return (Matrix[Double] mutualReach)
+{
+    # mutualReach(i,j) = max(dist(i,j), coreDist(i), coreDist(j))
+    # Diagonal is set to zero.
+
+    n = nrow(distances)
+  
+    coreDistRow = t(coreDistances)
+    coreDistCol = coreDistances
+  
+    maxCoreRow = (distances > coreDistRow) * distances + (distances <= 
coreDistRow) * coreDistRow
+    mutualReach = (maxCoreRow > coreDistCol) * maxCoreRow + (maxCoreRow <= 
coreDistCol) * coreDistCol
+  
+    mutualReach = mutualReach * (1 - diag(matrix(1, rows=n, cols=1)))
+}
+
+
+buildMST = function(Matrix[Double] distances, Integer n)
+    return (Matrix[Double] edges, Matrix[Double] weights)
+{
+  edges = matrix(0, rows=n-1, cols=2)
+  weights = matrix(0, rows=n-1, cols=1)
+  
+  inMST = matrix(0, rows=n, cols=1)
+  inMST[1] = 1
+  
+  minDist = distances[1,]
+  minDist = t(minDist)
+  
+  for(i in 1:(n-1)) {
+    candidates = minDist + inMST * 1e15 
+    minIdx = as.scalar(rowIndexMin(t(candidates)))
+    minWeight = as.scalar(minDist[minIdx])
+    
+    # Find which node in MST connects to minIdx
+    connectIdx = as.scalar(rowIndexMin(distances[minIdx,] + t(1-inMST) * 1e15))
+    edges[i,1] = minIdx
+    edges[i,2] = connectIdx
+    weights[i] = minWeight
+    
+    inMST[minIdx] = 1
+    newDists = distances[minIdx,]
+    minDist = (minDist < t(newDists)) * minDist + (minDist >= t(newDists)) * 
t(newDists)
+  }
+}
+
+
+# Union-find utils
+find = function(Matrix[Double] parent, Integer x)
+    return (Integer root)
+{
+    root = x
+    while(as.scalar(parent[root]) != root) {
+        root = as.integer(as.scalar(parent[root]))
+    }
+}
+
+
+union = function(Matrix[Double] parent, Matrix[Double] rank,
+                Integer x, Integer y, Matrix[Double] size,
+                Matrix[Double] compToNode, Integer newId)
+    return (Matrix[Double] newParent, Matrix[Double] newRank,
+            Matrix[Double] newSize, Matrix[Double] newCompToNode, Integer 
newRoot)
+{
+    newParent = parent
+    newRank = rank
+    newSize = size
+    newCompToNode = compToNode
+
+    rankX = as.scalar(rank[x,1])
+    rankY = as.scalar(rank[y,1])
+
+    combinedSize = as.scalar(size[x,1]) + as.scalar(size[y,1])
+
+    if(rankX < rankY) {
+        newParent[x] = y
+        newSize[y,1] = combinedSize
+        newCompToNode[y,1] = newId
+        newRoot = y
+    }
+    else if(rankX > rankY) {
+        newParent[y] = x
+        newSize[x,1] = combinedSize
+        newCompToNode[x,1] = newId
+        newRoot = x
+    }
+    else {
+        newParent[y] = x
+        newRank[x,1] = rankX + 1
+        newSize[x,1] = combinedSize
+        newCompToNode[x,1] = newId
+        newRoot = x
+    }
+}
+
+
+buildHierarchy = function(Matrix[Double] edges, Matrix[Double] weights, 
Integer n)
+    return (Matrix[Double] hierarchy, Matrix[Double] sizes)
+{
+    # create indexed weights to preserve original positions after sorting
+    indexedWeights = cbind(seq(1, nrow(weights)), weights)
+    sorted = order(target=indexedWeights, by=2, decreasing=FALSE)
+
+    # parent[i] = i, meaning each point is its own parent in the beginning
+    parent = seq(1, n)
+    # tree depth when unioning (icnreases only when merged trees are of same 
height)
+    rank = matrix(0, rows=n, cols=1)
+
+    # initially, each component is a single point, so root i maps to leaf node 
i.
+    # Later, when two components merge, the new component root will map to a 
new
+    # internal linkage node id (n+1, n+2, ..., 2n-1)
+    compToNode = seq(1, n)
+
+    # size (number of original points) for each linkage-tree node id,
+    # leaves 1..n have size 1.
+    nodeSize = matrix(0, rows=2*n-1, cols=1)
+    for(j in 1:n) {
+        nodeSize[j,1] = 1
+    }
+
+    # hierarcy[i, :] = [cluster1_id, cluster2_id, merge_distance]
+    hierarchy = matrix(0, rows=n-1, cols=3)
+
+    # sizes[i] = size of the cluster created by merge i
+    sizes = matrix(0, rows=n-1, cols=1)
+
+    row = 1
+    for(i in 1:(n-1)) {
+        idx = as.integer(as.scalar(sorted[i,1]))
+        u = as.integer(as.scalar(edges[idx,1]))
+        v = as.integer(as.scalar(edges[idx,2]))
+        w = as.scalar(weights[idx,1])
+
+        root_u = find(parent, u)
+        root_v = find(parent, v)
+
+        if(root_u != root_v) {
+            left  = as.integer(as.scalar(compToNode[root_u]))
+            right = as.integer(as.scalar(compToNode[root_v]))
+
+            newId = n + row
+
+            hierarchy[row,1] = left
+            hierarchy[row,2] = right
+            hierarchy[row,3] = w
+
+            nodeSize[newId,1] = as.scalar(nodeSize[left,1]) + 
as.scalar(nodeSize[right,1])
+            sizes[row,1] = as.scalar(nodeSize[newId,1])
+
+            [parent, rank, ufSize, compToNode, newRoot] =
+                union(parent, rank, root_u, root_v, nodeSize, compToNode, 
newId)
+
+            row = row + 1
+        }
+    }
+}
+
+
+# Leaf descendants util
+getLeafDescendants = function(Matrix[Double] hierarchy, Integer n, Integer 
nodeId)
+    return (Matrix[Double] leaves)
+{
+    if(nodeId <= n) {
+        leaves = matrix(nodeId, rows=1, cols=1)
+    } else {
+        mergeIdx = nodeId - n
+        left = as.integer(as.scalar(hierarchy[mergeIdx,1]))
+        right = as.integer(as.scalar(hierarchy[mergeIdx,2]))
+        
+        leftLeaves = getLeafDescendants(hierarchy, n, left)
+        rightLeaves = getLeafDescendants(hierarchy, n, right)
+        
+        leaves = rbind(leftLeaves, rightLeaves)
+    }
+}
+
+
+extractStableClusters = function(Matrix[Double] hierarchy, Matrix[Double] 
weights, 
+                                  Integer n, Integer minClSize)
+    return (Matrix[Double] labels, Matrix[Double] stabilities, Matrix[Double] 
clusterToNode)
+{
+    numMerges = n - 1 # hierarchical tree over n points has exactly n-1 merge 
events
+    numNodes = 2*n - 1 # total nodes in the dendogram
+    
+    # convert distances to lambda (density)
+    lambda = matrix(0, rows=numMerges, cols=1)
+    for(i in 1:numMerges) {
+        dist = as.scalar(hierarchy[i,3])
+        if(dist > 0) {
+            lambda[i,1] = 1.0 / dist
+        } else {
+            lambda[i,1] = 1e15
+        }
+    }
+    
+    lambda_birth = matrix(1e15, rows=numNodes, cols=1)
+    lambda_death = matrix(0, rows=numNodes, cols=1)
+    cluster_size = matrix(0, rows=numNodes, cols=1)
+
+    # initialize the leaf nodes to have cluster size 1
+    for(i in 1:n) {
+        cluster_size[i,1] = 1
+    }
+    
+    for(i in 1:numMerges) {
+        left = as.integer(as.scalar(hierarchy[i,1]))
+        right = as.integer(as.scalar(hierarchy[i,2]))
+        newId = n + i
+        merge_lambda = as.scalar(lambda[i,1])
+        
+        # cluster newId starts existing as its own cluster at this density 
level
+        # and that's why the children get their det set at the same density
+        lambda_birth[newId,1] = merge_lambda
+        lambda_death[left,1] = merge_lambda
+        lambda_death[right,1] = merge_lambda
+        cluster_size[newId,1] = as.scalar(cluster_size[left,1]) + 
as.scalar(cluster_size[right,1])
+    }
+    
+    # root cluster exists all the way
+    rootId = 2*n - 1
+    lambda_death[rootId,1] = 0
+    
+    # compute own stability for each internal node
+    # NOTE: If the cluster is big enough, we assign stability.
+    # The more long-lived it is (birth - death) and 
+    # the larger it is, the more stable it is.
+    stability = matrix(0, rows=numNodes, cols=1)
+    for(nodeId in (n+1):numNodes) {
+        size = as.scalar(cluster_size[nodeId,1])
+        birth = as.scalar(lambda_birth[nodeId,1])
+        death = as.scalar(lambda_death[nodeId,1])
+        if(size >= minClSize) {
+            stability[nodeId,1] = size * (birth - death)
+        }
+    }
+    
+    # compute subtree stability (best achievable from each subtree)
+    subtree_stability = matrix(0, rows=numNodes, cols=1)
+    
+    # leaf nodes have 0 subtree stability
+    for(i in 1:n) {
+        subtree_stability[i,1] = 0
+    }
+    
+    # process merges in order (bottom-up)
+    for(i in 1:numMerges) {
+        nodeId = n + i
+        left = as.integer(as.scalar(hierarchy[i,1]))
+        right = as.integer(as.scalar(hierarchy[i,2]))
+        
+        children_subtree = as.scalar(subtree_stability[left,1]) + 
as.scalar(subtree_stability[right,1])
+        own_stab = as.scalar(stability[nodeId,1])
+        
+        # Subtree stability is the best we can achieve from this subtree
+        if(children_subtree > own_stab) {
+            subtree_stability[nodeId,1] = children_subtree
+        } else {
+            subtree_stability[nodeId,1] = own_stab
+        }
+    }
+    
+    # select clusters
+    selected = matrix(0, rows=numNodes, cols=1)
+    selected[rootId,1] = 1
+    
+    i = numMerges
+    while(i >= 1) {
+        nodeId = n + i
+        
+        if(as.scalar(selected[nodeId,1]) == 1) {
+            left = as.integer(as.scalar(hierarchy[i,1]))
+            right = as.integer(as.scalar(hierarchy[i,2]))
+            
+            children_subtree = as.scalar(subtree_stability[left,1]) + 
as.scalar(subtree_stability[right,1])
+            own_stab = as.scalar(stability[nodeId,1])
+            parent_size = as.scalar(cluster_size[nodeId,1])
+            
+            # select children if they have higher subtree stability
+            if(parent_size < minClSize | children_subtree > own_stab) {
+                selected[nodeId,1] = 0
+                selected[left,1] = 1
+                selected[right,1] = 1
+            }
+        }
+        
+        i = i - 1
+    }
+    
+    # assign labels
+    labels = matrix(-1, rows=n, cols=1)
+    cluster_id = 1
+
+    # while tracking which node each cluster comes from
+    maxClusters = sum(selected)  # upper bound on number of clusters
+    clusterToNode = matrix(0, rows=maxClusters, cols=1)
+   
+    for(nodeId in 1:numNodes) {
+        if(as.scalar(selected[nodeId,1]) == 1) {
+            size = as.scalar(cluster_size[nodeId,1])
+            
+            if(size >= minClSize) {
+                clusterToNode[cluster_id,1] = nodeId  # ADD THIS LINE
+                
+                leaves = getLeafDescendants(hierarchy, n, nodeId)
+                
+                for(j in 1:nrow(leaves)) {
+                    leafId = as.integer(as.scalar(leaves[j,1]))
+                    if(leafId >= 1 & leafId <= n) {
+                        labels[leafId,1] = cluster_id
+                    }
+                }
+                
+                cluster_id = cluster_id + 1
+            }
+        }
+    }
+    
+    # trim clusterToNode to actual number of clusters
+    numActualClusters = cluster_id - 1
+    if(numActualClusters > 0) {
+        clusterToNode = clusterToNode[1:numActualClusters,]
+    } else {
+        clusterToNode = matrix(0, rows=0, cols=1)
+    }
+
+    stabilities = stability
+
+}
+
+
+buildClusterModel = function(Matrix[Double] X, Matrix[Double] labels, 
+                             Matrix[Double] stabilities, Matrix[Double] 
clusterToNode)
+    return (Matrix[Double] centroids, Matrix[Double] clusterInfo)
+{
+    n = nrow(X)
+    d = ncol(X)
+
+    numClusters = as.integer(max(labels))
+
+    if(numClusters <= 0) {
+        # No clusters found, return empty model
+        centroids = matrix(0, rows=0, cols=d)
+        clusterInfo = matrix(0, rows=0, cols=2)
+    } else {
+        centroids = matrix(0, rows=numClusters, cols=d)
+        clusterInfo = matrix(0, rows=numClusters, cols=2)
+
+        for(c in 1:numClusters) {
+            # find all points in cluster c
+            mask = (labels == c)
+            clusterSize = sum(mask)
+
+            if(clusterSize > 0) {
+                # get centroid as mean of all points in cluster
+                clusterSum = t(mask) %*% X
+                centroids[c,] = clusterSum / clusterSize
+
+                # store cluster metadata: [size, stability]
+                clusterInfo[c,1] = clusterSize
+
+                # get stability for this cluster from the mapping
+                if(nrow(clusterToNode) >= c & as.scalar(clusterToNode[c,1]) > 
0) {
+                    nodeId = as.integer(as.scalar(clusterToNode[c,1]))
+                    clusterInfo[c,2] = as.scalar(stabilities[nodeId,1])
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/scripts/staging/hdbscan/test_build_hierarchy.dml 
b/scripts/staging/hdbscan/test_build_hierarchy.dml
new file mode 100644
index 0000000000..fc323c23d2
--- /dev/null
+++ b/scripts/staging/hdbscan/test_build_hierarchy.dml
@@ -0,0 +1,142 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+#        4
+#      / | \
+#     /  |  \
+#    /  (2)  (5)
+#    |   |    \
+#    |   |     \
+#   (2)  1-(3)--3
+#    |   |    /
+#    \   |   (4)
+#     \  (1) /
+#      \ | /
+#       \|/
+#        2
+
+distances = matrix(0, rows=4, cols=4)
+distances[1,2] = 1 
+distances[2,1] = 1
+
+distances[1,3] = 3 
+distances[3,1] = 3
+
+distances[1,4] = 2  
+distances[4,1] = 2
+
+distances[2,3] = 4 
+distances[3,2] = 4
+
+distances[2,4] = 2
+distances[4,2] = 2
+
+distances[3,4] = 5 
+distances[4,3] = 5
+
+[edges, weights] = hdb::buildMST(distances, 4)
+
+[hierarchy, sizes] = hdb::buildHierarchy(edges, weights, 4)
+
+print("Hierarchy (format: [cluster1, cluster2, merge_distance]):")
+print(toString(hierarchy))
+
+# Should have n-1 merge operations for n nodes
+num_merges = nrow(hierarchy)
+print("Number of merges: " + num_merges + " (should be 3)")
+test1 = (num_merges == 3)
+
+# Merge distances should be in ascending order (or equal)
+# Because we process edges from low weight to high weight
+dist1 = as.scalar(hierarchy[1,3])
+dist2 = as.scalar(hierarchy[2,3])
+dist3 = as.scalar(hierarchy[3,3])
+print("\nMerge distances: [" + dist1 + ", " + dist2 + ", " + dist3 + "]" + " 
(Should be in ascending order)")
+test2 = (dist1 <= dist2) & (dist2 <= dist3)
+
+# Cluster sizes should increase
+size1 = as.scalar(sizes[1])
+size2 = as.scalar(sizes[2])
+size3 = as.scalar(sizes[3])
+print("\nCluster sizes: [" + size1 + ", " + size2 + ", " + size3 + "]" + " 
(Should be increasing)")
+test3 = (size1 <= size2) & (size2 <= size3)
+
+# Final size should equal total number of nodes
+print("Final cluster size: " + size3 + " (should be 4)")
+test4 = (size3 == 4)
+
+# First merge should be size 2
+print("First merge size: " + size1 + " (should be 2)")
+test5 = (size1 == 2)
+
+# New classic-linkage checks
+n = 4
+hasInternal = (sum(hierarchy[,1] > n) + sum(hierarchy[,2] > n)) > 0
+print("Has internal node ids (>n): " + hasInternal + " (should be true)")
+test6 = hasInternal
+
+# Check that child ids are within valid range
+maxChild1 = max(hierarchy[,1])
+maxChild2 = max(hierarchy[,2])
+maxChild = maxChild1
+if(maxChild2 > maxChild) { maxChild = maxChild2 }
+
+print("Max child id: " + maxChild + " (should be <= " + (2*n-2) + ")")
+test7 = (maxChild <= (2*n-2))
+
+# Check that internal ids are “created in order”
+test8 = TRUE
+for(r in 1:(n-1)) {
+    child1 = as.scalar(hierarchy[r,1])
+    child2 = as.scalar(hierarchy[r,2])
+    newId = n + r
+    ok = (child1 < newId) & (child2 < newId)
+    test8 = test8 & ok
+}
+print("Children reference only existing nodes: " + test8 + " (should be true)")
+
+# Recompute node sizes from hierarchy and verify
+nodeSize = matrix(0, rows=2*n-1, cols=1)
+for(j in 1:n) { nodeSize[j,1] = 1 }
+
+test9 = TRUE
+for(r in 1:(n-1)) {
+    left = as.integer(as.scalar(hierarchy[r,1]))
+    right = as.integer(as.scalar(hierarchy[r,2]))
+    newId = n + r
+
+    expected = as.scalar(nodeSize[left,1]) + as.scalar(nodeSize[right,1])
+    nodeSize[newId,1] = expected
+
+    ok = (as.scalar(sizes[r,1]) == expected)
+    test9 = test9 & ok
+}
+print("sizes[r] equals sum of child sizes: " + test9 + " (should be true)")
+
+test_pass = test1 & test2 & test3 & test4 & test5 & test6 & test7 & test8 & 
test9
+
+if(test_pass) {
+    print("Passed")
+} else {
+    print("Failed")
+}
\ No newline at end of file
diff --git a/scripts/staging/hdbscan/test_build_mst.dml 
b/scripts/staging/hdbscan/test_build_mst.dml
new file mode 100644
index 0000000000..4ec05e18fb
--- /dev/null
+++ b/scripts/staging/hdbscan/test_build_mst.dml
@@ -0,0 +1,68 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+#        4
+#      / | \
+#     /  |  \
+#    /  (2)  (5)
+#    |   |    \
+#    |   |     \
+#   (2)  1-(3)--3
+#    |   |    /
+#    \   |   (4)
+#     \  (1) /
+#      \ | /
+#       \|/
+#        2
+
+distances = matrix(0, rows=4, cols=4)
+distances[1,2] = 1 
+distances[2,1] = 1
+
+distances[1,3] = 3 
+distances[3,1] = 3
+
+distances[1,4] = 2  
+distances[4,1] = 2
+
+distances[2,3] = 4 
+distances[3,2] = 4
+
+distances[2,4] = 2
+distances[4,2] = 2
+
+distances[3,4] = 5 
+distances[4,3] = 5
+
+[edges, weights] = hdb::buildMST(distances, 4)
+
+totalWeight = sum(weights)
+
+test_pass = (nrow(edges) == 3) & (totalWeight == 6)
+
+if(test_pass) {
+    print("Passed")
+} else {
+    print("Failed")
+}
\ No newline at end of file
diff --git a/scripts/staging/hdbscan/test_cluster_model.dml 
b/scripts/staging/hdbscan/test_cluster_model.dml
new file mode 100644
index 0000000000..363b356ab5
--- /dev/null
+++ b/scripts/staging/hdbscan/test_cluster_model.dml
@@ -0,0 +1,151 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+# 3 clear clusters
+# A: points around (0, 0)
+# B: points around (10, 10)
+# C: points around (20, 0)
+
+n = 12
+d = 2
+X = matrix(0, rows=n, cols=d)
+
+# A
+X[1,] = matrix("0.0 0.0", rows=1, cols=2)
+X[2,] = matrix("0.5 0.5", rows=1, cols=2)
+X[3,] = matrix("-0.5 0.5", rows=1, cols=2)
+X[4,] = matrix("0.0 -0.5", rows=1, cols=2)
+
+# B
+X[5,] = matrix("10.0 10.0", rows=1, cols=2)
+X[6,] = matrix("10.5 10.5", rows=1, cols=2)
+X[7,] = matrix("9.5 10.5", rows=1, cols=2)
+X[8,] = matrix("10.0 9.5", rows=1, cols=2)
+
+# C
+X[9,] = matrix("20.0 0.0", rows=1, cols=2)
+X[10,] = matrix("20.5 0.5", rows=1, cols=2)
+X[11,] = matrix("19.5 0.5", rows=1, cols=2)
+X[12,] = matrix("20.0 -0.5", rows=1, cols=2)
+
+print(toString(X))
+
+
+# get distances
+distances = hdb::dist(X)
+
+
+# get core distances
+minPts = 3
+coreDistances = matrix(0, rows=n, cols=1)
+for(i in 1:n) {
+    kthDist = hdb::computeKthSmallest(t(distances[i,]), minPts)
+    coreDistances[i] = kthDist
+}
+
+
+# get mutual reachability
+mutualReach = hdb::computeMutualReachability(distances, coreDistances)
+
+
+# get MST
+[edges, weights] = hdb::buildMST(mutualReach, n)
+
+
+# get hierarchy
+[hierarchy, sizes] = hdb::buildHierarchy(edges, weights, n)
+
+
+# get stable clusters
+minClSize = 3
+[labels, stabilities, clusterToNode] = hdb::extractStableClusters(hierarchy, 
weights, n, minClSize)
+expected_labels = matrix("1 1 1 1 2 2 2 2 3 3 3 3", rows=12, cols=1)
+labels_match = (min(labels == expected_labels) == 1)
+if (labels_match) {
+    print("Pass: labels match.")
+} else {
+    print("Fail: labels don't match.")
+}
+print("Cluster labels:")
+print(toString(labels))
+
+
+# get cluster model
+[centroids, clusterInfo] = hdb::buildClusterModel(X, labels, stabilities, 
clusterToNode)
+
+print("\nCentroids:")
+print(toString(centroids))
+
+print("\nInfo [size, stability]:")
+print(toString(clusterInfo))
+
+
+# check results
+numClusters = nrow(centroids)
+print("\nNumber of clusters found: " + numClusters)
+
+
+# should find 3 clusters
+test1 = (numClusters == 3)
+print("Found 3 clusters: " + test1)
+
+
+# each cluster should have 4 points
+allSizesFour = TRUE
+for(c in 1:numClusters) {
+    size = as.scalar(clusterInfo[c,1])
+    allSizesFour = allSizesFour & (size == 4)
+}
+print("All clusters have size 4: " + allSizesFour)
+
+
+test_A = min(sqrt(rowSums((centroids - matrix("0 0.125", 1, 2))^2))) < 0.001
+test_B = min(sqrt(rowSums((centroids - matrix("10 10.125", 1, 2))^2))) < 0.001
+test_C = min(sqrt(rowSums((centroids - matrix("20 0.125", 1, 2))^2))) < 0.001
+all_found = test_A & test_B & test_C
+print("Expected centroids near expected positions: " + all_found)
+
+
+# no noise (all assigned to clusters)
+numNoise = sum(labels == -1)
+test4 = (numNoise == 0)
+print("No noise points: " + test4)
+
+
+# stabilities are populated (not 0)
+test_stability = TRUE
+for(c in 1:numClusters) {
+    stab = as.scalar(clusterInfo[c,2])
+    test_stability = test_stability & (stab > 0)
+}
+print("Stabilities populated: " + test_stability)
+
+
+test_pass = test1 & allSizesFour & all_found & test4 & test_stability
+
+if(test_pass) {
+    print("\nAll tests passed")
+} else {
+    print("\nTests failed")
+}
\ No newline at end of file
diff --git a/scripts/staging/hdbscan/test_extract_stabe_clusters.dml 
b/scripts/staging/hdbscan/test_extract_stabe_clusters.dml
new file mode 100644
index 0000000000..8d19acf7cd
--- /dev/null
+++ b/scripts/staging/hdbscan/test_extract_stabe_clusters.dml
@@ -0,0 +1,158 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+# 6 point example with clear cluster structure
+# 1,2,3 form tight cluster A, 4,5,6 form tight cluster B, A and B are far 
apart (10)
+
+n = 6
+distances = matrix(10, rows=n, cols=n)
+
+# points 1,2,3 (cluster A)
+distances[1,2] = 1
+distances[2,1] = 1
+
+distances[1,3] = 2
+distances[3,1] = 2
+
+distances[2,3] = 1
+distances[3,2] = 1
+
+# points 4,5,6 (cluster B)
+distances[4,5] = 1
+distances[5,4] = 1
+
+distances[4,6] = 2
+distances[6,4] = 2
+
+distances[5,6] = 1
+distances[6,5] = 1
+
+# zero diagonal (to self)
+for(i in 1:n) {
+    distances[i,i] = 0
+}
+
+
+print("\nBuilding MST")
+expected_edges = matrix("2 1  3 2  6 3  5 6  4 5", rows=5, cols=2)
+expected_weights = matrix("1 1 10 1 1", rows=5, cols=1)
+[edges, weights] = hdb::buildMST(distances, n)
+edges_match = (min(edges == expected_edges) == 1)
+weights_match = (min(weights == expected_weights) == 1)
+if (edges_match) {
+    print("Pass: edges match.")
+} else {
+    print("Fail: edges don't match.")
+}
+if (weights_match) {
+    print("Pass: weights match.")
+} else {
+    print("Fail: weights don't match.")
+}
+print("MST edges:\n" + toString(edges))
+print("MST weights:\n " + toString(weights))
+
+
+print("\nBuilding hierarchy")
+[hierarchy, sizes] = hdb::buildHierarchy(edges, weights, n)
+expected_hierarchy = matrix("2 1 1  3 7 1  5 6 1  4 9 1  10 8 10", rows=5, 
cols=3)
+expected_sizes = matrix("2 3 2 3 6", rows=5, cols=1)
+hierachy_match = (min(hierarchy == expected_hierarchy) == 1)
+sizes_match = (min(sizes == expected_sizes) == 1)
+if (hierachy_match) {
+    print("Pass: hierachy mathes.")
+} else {
+    print("Fail: hierarchy doesn't match.")
+}
+if (sizes_match) {
+    print("Pass: sizes match.")
+} else {
+    print("Fail: sizes don't match.")
+}
+print("Hierarchy:\n" + toString(hierarchy))
+print("Sizes:\n" + toString(sizes))
+
+
+print("\nExtracting stable clusters with minClSize=2")
+[labels, stabilities] = hdb::extractStableClusters(hierarchy, weights, n, 2)
+expected_labels = matrix("1 1 1 2 2 2", rows=6, cols=1)
+expected_stabilites = matrix("0 0 0 0 0 0 0 2.7 0 2.7 0.6", rows=n*2-1, cols=1)
+labels_match = (min(labels == expected_labels) == 1)
+tolerance = 1e-10
+stabilities_match = max(abs(stabilities - expected_stabilites)) < tolerance
+if (labels_match) {
+    print("Pass: labels match.")
+} else {
+    print("Fail: labels don't match.")
+}
+if (stabilities_match) {
+    print("Pass: stabilities match.")
+} else {
+    print("Fail: stabilities don't match.")
+}
+print("Cluster labels:\n" + toString(labels))
+print("Top stabilities:\n" + toString(stabilities))
+
+
+
+# check results (we have some duplicate logic here, but anyway)
+num_clusters = max(labels)
+num_noise = sum(labels == -1)
+
+print("\nNumber of clusters found: " + num_clusters)
+print("Number of noise points: " + num_noise)
+
+# should find 2 clusters
+test1 = (num_clusters == 2)
+print("Found 2 clusters: " + test1)
+
+# no points should be noise
+test2 = (num_noise == 0)
+print("No noise points: " + test2)
+
+# points 1,2,3 should be in same cluster
+label1 = as.scalar(labels[1])
+label2 = as.scalar(labels[2])
+label3 = as.scalar(labels[3])
+test3 = (label1 == label2) & (label2 == label3) & (label1 > 0)
+print("points 1,2,3 in same cluster: " + test3)
+
+# points 4,5,6 should be in same
+label4 = as.scalar(labels[4])
+label5 = as.scalar(labels[5])
+label6 = as.scalar(labels[6])
+test4 = (label4 == label5) & (label5 == label6) & (label4 > 0)
+print("Points 4,5,6 in same cluster: " + test4)
+
+# clusters A and B should be different
+test5 = (label1 != label4)
+print("Two clusters are different: " + test5)
+
+test_pass = edges_match & weights_match & hierachy_match & sizes_match & 
labels_match & stabilities_match & test1 & test2 & test3 & test4 & test5
+
+if(test_pass) {
+    print("\nAll tests passed\n")
+} else {
+    print("\nTests failed\n")
+}
\ No newline at end of file
diff --git a/scripts/staging/hdbscan/test_hdbscan.dml 
b/scripts/staging/hdbscan/test_hdbscan.dml
new file mode 100644
index 0000000000..411ef0d047
--- /dev/null
+++ b/scripts/staging/hdbscan/test_hdbscan.dml
@@ -0,0 +1,73 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+# A: 5 points tightly grouped around (0, 0)
+# B: 5 points tightly grouped around (10, 0)
+# Outliers: 2 points far from any cluster
+
+n = 12
+d = 2
+X = matrix(0, rows=n, cols=d)
+
+# A
+X[1,] = matrix("0.0 0.0", rows=1, cols=2)
+X[2,] = matrix("0.3 0.2", rows=1, cols=2)
+X[3,] = matrix("-0.2 0.3", rows=1, cols=2)
+X[4,] = matrix("0.1 -0.2", rows=1, cols=2)
+X[5,] = matrix("-0.1 -0.1", rows=1, cols=2)
+
+# B
+X[6,] = matrix("10.0 0.0", rows=1, cols=2)
+X[7,] = matrix("10.3 0.2", rows=1, cols=2)
+X[8,] = matrix("9.8 0.3", rows=1, cols=2)
+X[9,] = matrix("10.1 -0.2", rows=1, cols=2)
+X[10,] = matrix("9.9 -0.1", rows=1, cols=2)
+
+# Outliers
+X[11,] = matrix("5.0 5.0", rows=1, cols=2)
+X[12,] = matrix("-5.0 -5.0", rows=1, cols=2)
+
+print(toString(X))
+
+minPts = 3
+minClSize = 4  
+
+
+# run hdbscan
+[labels, centroids] = hdb::m_hdbscan(X, minPts, minClSize)
+expected_labels = matrix("1 1 1 1 1 2 2 2 2 2 -1 -1", rows=12, cols=1)
+labels_match = (min(labels == expected_labels) == 1)
+if (labels_match) {
+    print("Pass: labels match.")
+} else {
+    print("Fail: labels don't match.")
+}
+test_A = min(sqrt(rowSums((centroids - matrix("0.020 0.040", 1, 2))^2))) < 
0.001
+test_B = min(sqrt(rowSums((centroids - matrix("10.020 0.040", 1, 2))^2))) < 
0.001
+all_found = test_A & test_B
+print("Expected centroids near expected positions: " + all_found)
+print("Cluster labels:")
+print(toString(labels))
+print("\nCluster centroids:")
+print(toString(centroids))
diff --git a/scripts/staging/hdbscan/test_kth_smallest.dml 
b/scripts/staging/hdbscan/test_kth_smallest.dml
new file mode 100644
index 0000000000..0ddf5d6a03
--- /dev/null
+++ b/scripts/staging/hdbscan/test_kth_smallest.dml
@@ -0,0 +1,36 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+array = matrix("0 1 4 2", rows=4, cols=1)
+
+res1 = hdb::computeKthSmallest(array, 1)
+res2 = hdb::computeKthSmallest(array, 2)
+
+test_pass = (res1 == 1) & (res2 == 2)
+
+if(test_pass) {
+    print("Passed")
+} else {
+    print("Failed")
+}
\ No newline at end of file
diff --git a/scripts/staging/hdbscan/test_mutual_reachability.dml 
b/scripts/staging/hdbscan/test_mutual_reachability.dml
new file mode 100644
index 0000000000..757b5f98ee
--- /dev/null
+++ b/scripts/staging/hdbscan/test_mutual_reachability.dml
@@ -0,0 +1,48 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+distances = matrix("0 1 5 1 0 4 5 4 0", rows=3, cols=3)
+coreDistances = matrix("1 1 4", rows=3, cols=1)
+
+mutualReach = hdb::computeMutualReachability(distances, coreDistances)
+
+diagSum = sum(diag(mutualReach))
+
+val_AB = as.scalar(mutualReach[1,2])
+val_AC = as.scalar(mutualReach[1,3])
+val_BC = as.scalar(mutualReach[2,3])
+
+sym_AB = as.scalar(mutualReach[2,1])
+sym_AC = as.scalar(mutualReach[3,1])
+sym_BC = as.scalar(mutualReach[3,2])
+
+test1_pass = (val_AB == 1) & (val_AC == 5) & (val_BC == 4) & 
+             (diagSum == 0) & 
+             (val_AB == sym_AB) & (val_AC == sym_AC) & (val_BC == sym_BC)
+
+if(test1_pass) {
+    print("Passed")
+} else {
+    print("Failed")
+}
diff --git a/scripts/staging/hdbscan/test_union_find.dml 
b/scripts/staging/hdbscan/test_union_find.dml
new file mode 100644
index 0000000000..05b0d70604
--- /dev/null
+++ b/scripts/staging/hdbscan/test_union_find.dml
@@ -0,0 +1,161 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+source("scripts/builtin/hdbscan.dml") as hdb
+
+#   1 -> 1
+#   2 -> 1
+#   3 -> 2 -> 1
+#   4 -> 4
+
+n = 4
+parent = matrix(0, rows=n, cols=1)
+parent[1] = 1
+parent[2] = 1
+parent[3] = 2
+parent[4] = 4
+
+root1 = hdb::find(parent, 1)
+root2 = hdb::find(parent, 2)
+root3 = hdb::find(parent, 3)
+root4 = hdb::find(parent, 4)
+
+test_find = (root1 == 1) & (root2 == 1) & (root3 == 1) & (root4 == 4)
+
+if(test_find) {
+    print("Passed")
+} else {
+    print("Failed")
+}
+
+# ensure union() attaches lower-rank root under higher-rank root
+# rank[1] < rank[2]  => parent[1] becomes 2
+
+newId = 5  # arbitrary "new linkage node id" for this unit test
+
+parentA = matrix(0, rows=n, cols=1)
+parentA[1] = 1
+parentA[2] = 2
+parentA[3] = 3
+parentA[4] = 4
+
+rankA = matrix(0, rows=n, cols=1)
+rankA[1] = 0
+rankA[2] = 1
+rankA[3] = 0
+rankA[4] = 0
+
+sizeA = matrix(1, rows=n, cols=1)
+
+compToNodeA = matrix(0, rows=n, cols=1)
+compToNodeA[1] = 1
+compToNodeA[2] = 2
+compToNodeA[3] = 3
+compToNodeA[4] = 4
+
+[newParentA, newRankA, newSizeA, newCompToNodeA, newRootA] = 
hdb::union(parentA, rankA, 1, 2, sizeA, compToNodeA, newId)
+test_unionA = (as.scalar(newParentA[1]) == 2) & (as.scalar(newParentA[2]) == 2)
+
+if(test_unionA) {
+    print("Passed")
+} else {
+    print("Failed")
+}
+
+# ensure union() attaches lower-rank root under higher-rank root
+# rank[1] > rank[2]  => parent[2] becomes 1
+
+parentB = matrix(0, rows=n, cols=1)
+parentB[1] = 1
+parentB[2] = 2
+parentB[3] = 3
+parentB[4] = 4
+
+rankB = matrix(0, rows=n, cols=1)
+rankB[1] = 2
+rankB[2] = 1
+rankB[3] = 0
+rankB[4] = 0
+
+sizeB = matrix(1, rows=n, cols=1)
+
+compToNodeB = matrix(0, rows=n, cols=1)
+compToNodeB[1] = 1
+compToNodeB[2] = 2
+compToNodeB[3] = 3
+compToNodeB[4] = 4
+
+[newParentB, newRankB, newSizeB, newCompToNodeB, newRootB] = 
hdb::union(parentB, rankB, 1, 2, sizeB, compToNodeB, newId)
+test_unionB = (as.scalar(newParentB[2]) == 1) & (as.scalar(newParentB[1]) == 1)
+
+if(test_unionB) {
+    print("Passed")
+} else {
+    print("Failed")
+}
+
+# check linkage
+test_unionB = test_unionB & (newRootB == 1) & (as.scalar(newCompToNodeB[1]) == 
newId)
+
+if(test_unionB) {
+    print("Passed")
+} else {
+    print("Failed")
+}
+
+
+# ensure union() increments if height is same
+
+parentC = matrix(0, rows=n, cols=1)
+parentC[1] = 1
+parentC[2] = 2
+parentC[3] = 3
+parentC[4] = 4
+
+rankC = matrix(0, rows=n, cols=1)
+rankC[1] = 0
+rankC[2] = 0
+rankC[3] = 0
+rankC[4] = 0
+
+sizeC = matrix(1, rows=n, cols=1)
+
+compToNodeC = matrix(0, rows=n, cols=1)
+compToNodeC[1] = 1
+compToNodeC[2] = 2
+compToNodeC[3] = 3
+compToNodeC[4] = 4
+
+[newParentC, newRankC, newSizeC, newCompToNodeC, newRootC] = 
hdb::union(parentC, rankC, 1, 2, sizeC, compToNodeC, newId)
+
+test_unionC_parent = (as.scalar(newParentC[2]) == 1)
+test_unionC_rank = (as.scalar(newRankC[1]) == 1)
+
+test_unionC = test_unionC_parent & test_unionC_rank
+
+test_unionC = test_unionC & (newRootC == 1) & (as.scalar(newCompToNodeC[1]) == 
newId)
+
+if(test_unionC) {
+    print("Passed")
+} else {
+    print("Failed")
+}

(systemds) branch main updated: [SYSTEMDS-3858] New HDBSCAN builtin function

Reply via email to