From 5f827659b427afb1467cc1617dead988c0616724 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Thu, 30 Apr 2026 09:43:29 +0200 Subject: [PATCH 01/10] feat(EstimatorRowWise.java): implement the first version of the row wise sparsity estimator works for the matrix multiplication and bind test cases for now --- .../sysds/hops/estim/EstimatorRowWise.java | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java new file mode 100644 index 00000000000..4dc2ef416af --- /dev/null +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.hops.estim; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.data.SparseRow; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.meta.DataCharacteristics; +import org.apache.sysds.runtime.meta.MatrixCharacteristics; + +import java.util.stream.DoubleStream; +import java.util.stream.IntStream; + +/** + * This estimator implements an approach based on row-wise sparsity estimation, + * introduced in + * Lin, Chunxu, Wensheng Luo, Yixiang Fang, Chenhao Ma, Xilin Liu and Yuchi Ma: + * On Efficient Large Sparse Matrix Chain Multiplication. + * Proceedings of the ACM on Management of Data 2 (2024): 1 - 27. + */ +public class EstimatorRowWise extends SparsityEstimator { + @Override + public DataCharacteristics estim(MMNode root) { + double[] rsOut = estimInternMMChain(root); + double sparsity = DoubleStream.of(rsOut).average().orElse(0); + + MatrixCharacteristics matrixCharacteristics = getMatrixCharacteristics(root, sparsity); + + return root.setDataCharacteristics(matrixCharacteristics); + } + + @Override + public double estim(MatrixBlock m1, MatrixBlock m2) { + return estim(m1, m2, OpCode.MM); + } + + @Override + public double estim(MatrixBlock m1, MatrixBlock m2, OpCode op) { + if( isExactMetadataOp(op) ) + return estimExactMetaData(m1.getDataCharacteristics(), + m2.getDataCharacteristics(), op).getSparsity(); + + double[] rsOut = estimIntern(m1, m2, op); + return DoubleStream.of(rsOut).average().orElse(0); + } + + @Override + public double estim(MatrixBlock m1, OpCode op) { + if( isExactMetadataOp(op) ) + return estimExactMetaData(m1.getDataCharacteristics(), null, op).getSparsity(); + throw new NotImplementedException(); + } + + private double[] estimInternMMChain(MMNode node) { + return estimInternMMChain(node, null, null); + } + + private double[] estimInternMMChain(MMNode node, double[] rsRightNeighbor, OpCode opRightNeighbor) { + if(node.isLeaf()) { + MatrixBlock mb = node.getData(); + if(rsRightNeighbor == null) + return getRowWiseSparsityVector(mb); + else + return estimIntern(mb, rsRightNeighbor, opRightNeighbor); + } + switch(node.getOp()) { + case MM: + double[] rsRightNode = estimInternMMChain(node.getRight(), rsRightNeighbor, opRightNeighbor); + return estimInternMMChain(node.getLeft(), rsRightNode, node.getOp()); + case CBIND: + case RBIND: + // consider the current node as new DAG for estimation (cut) + double[] rsOut = estimInternBind(estimInternMMChain(node.getLeft()), + estimInternMMChain(node.getRight()), node.getOp()); + if(rsRightNeighbor != null) { + rsOut = estimInternMM(rsOut, rsRightNeighbor); + } + return rsOut; + default: + throw new NotImplementedException(); + } + } + + private double[] estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { + double[] rsM2 = getRowWiseSparsityVector(m2); + return estimIntern(m1, rsM2, op); + } + + private double[] estimIntern(MatrixBlock m1, double[] rsM2, OpCode op) { + switch(op) { + case MM: + return estimInternMM(m1, rsM2); + case CBIND: + case RBIND: + return estimInternBind(getRowWiseSparsityVector(m1), rsM2, op); + default: + throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); + } + } + + // Corresponds to Algorithm 1 in the publication + private double[] estimInternMM(MatrixBlock m1, double[] rsM2) { + double[] rsOut = new double[m1.getNumRows()]; + for(int r = 0; r < m1.getNumRows(); r++) { + int nonZeroCols[] = getNonZeroColumnIndices(m1, r); + double temp = 1; + for(int c : nonZeroCols) { + temp *= (double) 1 - rsM2[c]; + } + rsOut[r] = (double) 1 - temp; + } + return rsOut; + } + + private double[] estimInternMM(double[] rsM1, double[] rsM2) { + double[] rsOut = DoubleStream.of(rsM1).map( + rsM1I -> (double) 1 - DoubleStream.of(rsM2).reduce((double) 1, + (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))).toArray(); + return rsOut; + } + + private double[] estimInternBind(double[] rsM1, double[] rsM2, OpCode op) { + switch(op) { + case CBIND: + return IntStream.range(0, rsM1.length) + .mapToDouble(idx -> (double) rsM1[idx] + rsM2[idx]).toArray(); + case RBIND: + return ArrayUtils.addAll(rsM1, rsM2); + default: + throw new DMLRuntimeException("We should never reach this point."); + } + } + + private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double sparsity) { + switch(root.getOp()) { + case MM: + MMNode tmpNode = root; + while(!tmpNode.isLeaf()) { + tmpNode = tmpNode.getLeft(); + } + int numRows = tmpNode.getData().getNumRows(); + tmpNode = root; + while(!tmpNode.isLeaf()) { + tmpNode = tmpNode.getRight(); + } + int numColumns = tmpNode.getData().getNumColumns(); + + return new MatrixCharacteristics( + numRows, numColumns, (long)(numRows * numColumns * sparsity)); + default: + throw new NotImplementedException(); + } + } + + private double[] getRowWiseSparsityVector(MatrixBlock mb) { + int numRows = mb.getNumRows(); + double[] rs = new double[numRows]; + if(mb.isInSparseFormat()) { + for(int counter = 0; counter < numRows; counter++) { + SparseRow sparseRow = mb.getSparseBlock().get(counter); + rs[counter] = (sparseRow == null) ? 0 : (double) sparseRow.size() / mb.getNumColumns(); + } + } + else { + for(int counter = 0; counter < numRows; counter++) { + rs[counter] = (double) mb.getDenseBlock().countNonZeros(counter) / mb.getNumColumns(); + } + } + return rs; + } + + private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { + int[] nonZeroCols; + if(mb.isInSparseFormat()) { + SparseRow sparseRow = mb.getSparseBlock().get(rIdx); + nonZeroCols = (sparseRow == null) ? new int[0] : sparseRow.indexes(); + } + else { + nonZeroCols = IntStream.range(0, mb.getNumColumns()) + .filter(cIdx -> mb.get(rIdx, cIdx) != 0).toArray(); + } + return nonZeroCols; + } +}; From e92e8aa889f7f3738f6f6f2f8ad64e03defef6fd Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Thu, 30 Apr 2026 09:46:43 +0200 Subject: [PATCH 02/10] feat(test/estim): add some tests for the row wise sparsity estimator to the unity tests for sparsity estimation --- .../test/component/estim/OpBindChainTest.java | 16 ++++++++++++++-- .../sysds/test/component/estim/OpBindTest.java | 14 +++++++++++++- .../test/component/estim/OuterProductTest.java | 11 +++++++++++ .../test/component/estim/SelfProductTest.java | 11 ++++++++++- .../component/estim/SquaredProductChainTest.java | 13 ++++++++++++- .../test/component/estim/SquaredProductTest.java | 13 ++++++++++++- 6 files changed, 72 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java index 35efedaf625..4726cf36daa 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java @@ -24,6 +24,7 @@ import org.apache.sysds.hops.estim.EstimatorBasicWorst; import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.MMNode; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -127,8 +128,19 @@ public void testLGCasecbind() { new EstimatorLayeredGraph(EstimatorLayeredGraph.ROUNDS, 3), m, k, n, sparsity, cbind); } - - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseRbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, rbind); + } + + @Test + public void testRowWiseCbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, cbind); + } + + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp, OpCode op) { MatrixBlock m1; MatrixBlock m2; diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java index 3e7ad24fe86..31a9be713bc 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java @@ -24,6 +24,7 @@ import org.apache.sysds.hops.estim.EstimatorBasicWorst; import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.hops.estim.SparsityEstimator.OpCode; @@ -132,7 +133,18 @@ public void testSampleCaserbind() { public void testSampleCasecbind() { runSparsityEstimateTest(new EstimatorSample(), m, k, n, sparsity, cbind); }*/ - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseRbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, rbind); + } + + @Test + public void testRowWiseCbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, cbind); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp, OpCode op) { MatrixBlock m1; diff --git a/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java b/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java index fdc33d878db..f71d9989ccd 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java @@ -26,6 +26,7 @@ import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.runtime.instructions.InstructionUtils; @@ -150,6 +151,16 @@ public void testLayeredGraphCase2() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, k, n, case2); } + @Test + public void testRowWiseCase1() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case1); + } + + @Test + public void testRowWiseCase2() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case2); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3); diff --git a/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java b/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java index d99f38d939b..2feeae6fc37 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java @@ -28,6 +28,7 @@ import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.EstimatorSampleRa; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -156,7 +157,15 @@ public void testLayeredGraphCase1() { public void testLayeredGraphCase2() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, sparsity2); } - + + @Test + public void testRowWiseCase() { + runSparsityEstimateTest(new EstimatorRowWise(), m/4, sparsity0); + runSparsityEstimateTest(new EstimatorRowWise(), m/2, sparsity1); + runSparsityEstimateTest(new EstimatorRowWise(), m, sparsity2); + runSparsityEstimateTest(new EstimatorRowWise(), m, sparsity3); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int n, double sp) { MatrixBlock m1 = MatrixBlock.randOperations(n, n, sp, 1, 1, "uniform", 3); MatrixBlock m3 = m1.aggregateBinaryOperations(m1, m1, diff --git a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java index f799b02c96d..502ed62de29 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java @@ -26,6 +26,7 @@ import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.MMNode; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.hops.estim.SparsityEstimator.OpCode; @@ -146,7 +147,17 @@ public void testLayeredGraph32Case1() { public void testLayeredGraph32Case2() { runSparsityEstimateTest(new EstimatorLayeredGraph(32), m, k, n, n2, case2); } - + + @Test + public void testRowWiseCase1() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, n2, case1); + } + + @Test + public void testRowWiseCase2() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, n2, case2); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, int n2, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 1); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 2); diff --git a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java index 2a898f9c39f..678c5daa31a 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java @@ -25,6 +25,7 @@ import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -154,7 +155,17 @@ public void testLayeredGraphCase1() { public void testLayeredGraphCase2() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, k, n, case2); } - + + @Test + public void testRowWiseCase1() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case1); + } + + @Test + public void testRowWiseCase2() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case2); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 7); From 6d518160df238d4a2309fdd82ea338c316f4d2dd Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Mon, 4 May 2026 17:14:25 +0200 Subject: [PATCH 03/10] feat(hops/estim/EstimatorRowWise.java): introduce a separate object container for row wise sparsity vectors to simplify access and allow storing it with chain nodes --- .../sysds/hops/estim/EstimatorRowWise.java | 180 ++++++++++++------ 1 file changed, 124 insertions(+), 56 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index 4dc2ef416af..bdd9f85e0e3 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -21,12 +21,13 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.NotImplementedException; -import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.data.SparseRow; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.meta.DataCharacteristics; import org.apache.sysds.runtime.meta.MatrixCharacteristics; +import java.util.function.DoubleBinaryOperator; +import java.util.function.DoubleUnaryOperator; import java.util.stream.DoubleStream; import java.util.stream.IntStream; @@ -40,8 +41,8 @@ public class EstimatorRowWise extends SparsityEstimator { @Override public DataCharacteristics estim(MMNode root) { - double[] rsOut = estimInternMMChain(root); - double sparsity = DoubleStream.of(rsOut).average().orElse(0); + estimInternChain(root); + double sparsity = ((RSVector)root.getSynopsis()).avg(); MatrixCharacteristics matrixCharacteristics = getMatrixCharacteristics(root, sparsity); @@ -55,12 +56,13 @@ public double estim(MatrixBlock m1, MatrixBlock m2) { @Override public double estim(MatrixBlock m1, MatrixBlock m2, OpCode op) { - if( isExactMetadataOp(op) ) + if( isExactMetadataOp(op) ) { return estimExactMetaData(m1.getDataCharacteristics(), m2.getDataCharacteristics(), op).getSparsity(); + } - double[] rsOut = estimIntern(m1, m2, op); - return DoubleStream.of(rsOut).average().orElse(0); + RSVector rsOut = estimIntern(m1, m2, op); + return rsOut.avg(); } @Override @@ -70,84 +72,115 @@ public double estim(MatrixBlock m1, OpCode op) { throw new NotImplementedException(); } - private double[] estimInternMMChain(MMNode node) { - return estimInternMMChain(node, null, null); + private void estimInternChain(MMNode node) { + estimInternChain(node, null, null); } - private double[] estimInternMMChain(MMNode node, double[] rsRightNeighbor, OpCode opRightNeighbor) { + private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRightNeighbor) { if(node.isLeaf()) { MatrixBlock mb = node.getData(); - if(rsRightNeighbor == null) - return getRowWiseSparsityVector(mb); + RSVector rsOut; + if(rsRightNeighbor != null) + rsOut = estimIntern(mb, rsRightNeighbor, opRightNeighbor); else - return estimIntern(mb, rsRightNeighbor, opRightNeighbor); + rsOut = getRowWiseSparsityVector(mb); + node.setSynopsis(rsOut); + return; } switch(node.getOp()) { case MM: - double[] rsRightNode = estimInternMMChain(node.getRight(), rsRightNeighbor, opRightNeighbor); - return estimInternMMChain(node.getLeft(), rsRightNode, node.getOp()); + estimInternChain(node.getRight(), rsRightNeighbor, opRightNeighbor); + estimInternChain(node.getLeft(), (RSVector)(node.getRight().getSynopsis()), node.getOp()); + node.setSynopsis(node.getLeft().getSynopsis()); + return; case CBIND: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into a cbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + node.setSynopsis(estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor)); + else + node.setSynopsis(rsCBind); + return; case RBIND: - // consider the current node as new DAG for estimation (cut) - double[] rsOut = estimInternBind(estimInternMMChain(node.getLeft()), - estimInternMMChain(node.getRight()), node.getOp()); - if(rsRightNeighbor != null) { - rsOut = estimInternMM(rsOut, rsRightNeighbor); - } - return rsOut; + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an rbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + node.setSynopsis(estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor)); + else + node.setSynopsis(rsRBind); + return; default: throw new NotImplementedException(); } } - private double[] estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { - double[] rsM2 = getRowWiseSparsityVector(m2); + private RSVector estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { + RSVector rsM2 = getRowWiseSparsityVector(m2); return estimIntern(m1, rsM2, op); } - private double[] estimIntern(MatrixBlock m1, double[] rsM2, OpCode op) { + private RSVector estimIntern(MatrixBlock m1, RSVector rsM2, OpCode op) { switch(op) { case MM: return estimInternMM(m1, rsM2); case CBIND: + return estimInternCBind(getRowWiseSparsityVector(m1), rsM2); case RBIND: - return estimInternBind(getRowWiseSparsityVector(m1), rsM2, op); + return estimInternRBind(getRowWiseSparsityVector(m1), rsM2); default: throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); } } - // Corresponds to Algorithm 1 in the publication - private double[] estimInternMM(MatrixBlock m1, double[] rsM2) { - double[] rsOut = new double[m1.getNumRows()]; - for(int r = 0; r < m1.getNumRows(); r++) { - int nonZeroCols[] = getNonZeroColumnIndices(m1, r); - double temp = 1; - for(int c : nonZeroCols) { - temp *= (double) 1 - rsM2[c]; - } - rsOut[r] = (double) 1 - temp; + private RSVector estimIntern(RSVector rsM1, RSVector rsM2, OpCode op) { + switch(op) { + case MM: + return estimInternMM(rsM1, rsM2); + // case CBIND: + // return estimInternCBind(rsM1, rsM2); + // case RBIND: + // return estimInternRBind(rsM1, rsM2); + default: + throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); } + } + + // Corresponds to Algorithm 1 in the publication + private RSVector estimInternMM(MatrixBlock m1, RSVector rsM2) { + RSVector rsOut = new RSVector(IntStream.range(0, m1.getNumRows()).mapToDouble( + r -> (double) 1 - IntStream.of(getNonZeroColumnIndices(m1, r)).mapToDouble( + c -> (double) 1 - rsM2.get(c) + ).reduce((double) 1, (currentVal, val) -> currentVal * val)) + .toArray()); return rsOut; } - private double[] estimInternMM(double[] rsM1, double[] rsM2) { - double[] rsOut = DoubleStream.of(rsM1).map( - rsM1I -> (double) 1 - DoubleStream.of(rsM2).reduce((double) 1, - (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))).toArray(); + // NOTE: this is the best estimation possible when we only have the two row sparsity vectors + private RSVector estimInternMM(RSVector rsM1, RSVector rsM2) { + // double avgRsM2 = DoubleStream.of(rsM2).average().orElse(0); + // RSVector rsOut = DoubleStream.of(rsM1).map( + // rsM1I -> (double) 1 - Math.pow((double) 1 - (rsM1I * avgRsM2), rsM2.length)).toArray(); + RSVector rsOut = rsM1.map( + rsM1I -> (double) 1 - rsM2.reduce((double) 1, + (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))); return rsOut; } - private double[] estimInternBind(double[] rsM1, double[] rsM2, OpCode op) { - switch(op) { - case CBIND: - return IntStream.range(0, rsM1.length) - .mapToDouble(idx -> (double) rsM1[idx] + rsM2[idx]).toArray(); - case RBIND: - return ArrayUtils.addAll(rsM1, rsM2); - default: - throw new DMLRuntimeException("We should never reach this point."); - } + private RSVector estimInternCBind(RSVector rsM1, RSVector rsM2) { + return new RSVector(IntStream.range(0, rsM1.size()).mapToDouble( + idx -> (rsM1.get(idx) + rsM2.get(idx)) / (double) 2).toArray()); + } + + private RSVector estimInternRBind(RSVector rsM1, RSVector rsM2) { + return rsM1.append(rsM2); } private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double sparsity) { @@ -171,21 +204,20 @@ private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double spars } } - private double[] getRowWiseSparsityVector(MatrixBlock mb) { + private RSVector getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); - double[] rs = new double[numRows]; if(mb.isInSparseFormat()) { + double[] rsArray = new double[numRows]; for(int counter = 0; counter < numRows; counter++) { SparseRow sparseRow = mb.getSparseBlock().get(counter); - rs[counter] = (sparseRow == null) ? 0 : (double) sparseRow.size() / mb.getNumColumns(); + rsArray[counter] = (sparseRow == null) ? 0 : (double) sparseRow.size() / mb.getNumColumns(); } + return new RSVector(rsArray); } else { - for(int counter = 0; counter < numRows; counter++) { - rs[counter] = (double) mb.getDenseBlock().countNonZeros(counter) / mb.getNumColumns(); - } + return new RSVector(IntStream.range(0, numRows).mapToDouble( + rIdx -> (double) mb.getDenseBlock().countNonZeros(rIdx) / mb.getNumColumns()).toArray()); } - return rs; } private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { @@ -200,4 +232,40 @@ private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { } return nonZeroCols; } + + public static class RSVector { + private final double[] rs; + + public RSVector(double[] rs) { + this.rs = rs; + } + + public double[] get() { + return this.rs; + } + + public double get(int idx) { + return this.rs[idx]; + } + + public int size() { + return this.rs.length; + } + + public double avg() { + return DoubleStream.of(this.rs).average().orElse(0); + } + + public RSVector append(RSVector that) { + return new RSVector(ArrayUtils.addAll(this.rs, that.get())); + } + + public RSVector map(DoubleUnaryOperator mapper) { + return new RSVector(DoubleStream.of(this.rs).map(mapper).toArray()); + } + + public double reduce(double identity, DoubleBinaryOperator op) { + return DoubleStream.of(this.rs).reduce(identity, op); + } + }; }; From af19dd0939706956caf5f02e16394800ac7a6cc7 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Tue, 5 May 2026 16:00:10 +0200 Subject: [PATCH 04/10] fix(hops/estim/EstimatorRowWise.java): fix derivation of output data characteristics --- .../sysds/hops/estim/EstimatorRowWise.java | 138 ++++++++++-------- 1 file changed, 79 insertions(+), 59 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index bdd9f85e0e3..ef951385948 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.runtime.data.SparseRow; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.meta.DataCharacteristics; @@ -44,9 +45,8 @@ public DataCharacteristics estim(MMNode root) { estimInternChain(root); double sparsity = ((RSVector)root.getSynopsis()).avg(); - MatrixCharacteristics matrixCharacteristics = getMatrixCharacteristics(root, sparsity); - - return root.setDataCharacteristics(matrixCharacteristics); + DataCharacteristics outputCharacteristics = deriveOutputCharacteristics(root, sparsity); + return root.setDataCharacteristics(outputCharacteristics); } @Override @@ -77,49 +77,53 @@ private void estimInternChain(MMNode node) { } private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRightNeighbor) { + RSVector rsOut; if(node.isLeaf()) { MatrixBlock mb = node.getData(); - RSVector rsOut; if(rsRightNeighbor != null) rsOut = estimIntern(mb, rsRightNeighbor, opRightNeighbor); else rsOut = getRowWiseSparsityVector(mb); - node.setSynopsis(rsOut); - return; } - switch(node.getOp()) { - case MM: - estimInternChain(node.getRight(), rsRightNeighbor, opRightNeighbor); - estimInternChain(node.getLeft(), (RSVector)(node.getRight().getSynopsis()), node.getOp()); - node.setSynopsis(node.getLeft().getSynopsis()); - return; - case CBIND: - /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of - * the right neighbor cannot be aggregated into a cbind operation when having only row sparsity vectors - */ - estimInternChain(node.getLeft()); - estimInternChain(node.getRight()); - RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - node.setSynopsis(estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor)); - else - node.setSynopsis(rsCBind); - return; - case RBIND: - /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of - * the right neighbor cannot be aggregated into an rbind operation when having only row sparsity vectors - */ - estimInternChain(node.getLeft()); - estimInternChain(node.getRight()); - RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - node.setSynopsis(estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor)); - else - node.setSynopsis(rsRBind); - return; - default: - throw new NotImplementedException(); + else { + switch(node.getOp()) { + case MM: + estimInternChain(node.getRight(), rsRightNeighbor, opRightNeighbor); + estimInternChain(node.getLeft(), (RSVector)(node.getRight().getSynopsis()), node.getOp()); + rsOut = (RSVector)node.getLeft().getSynopsis(); + break; + case CBIND: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into a cbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + rsOut = (RSVector)estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor); + else + rsOut = (RSVector)rsCBind; + break; + case RBIND: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an rbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + rsOut = (RSVector)estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor); + else + rsOut = (RSVector)rsRBind; + break; + default: + throw new NotImplementedException("Chain estimation for operator " + node.getOp().toString() + + " is not supported yet."); + } } + node.setSynopsis(rsOut); + node.setDataCharacteristics(deriveOutputCharacteristics(node, rsOut.avg())); + return; } private RSVector estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { @@ -183,27 +187,6 @@ private RSVector estimInternRBind(RSVector rsM1, RSVector rsM2) { return rsM1.append(rsM2); } - private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double sparsity) { - switch(root.getOp()) { - case MM: - MMNode tmpNode = root; - while(!tmpNode.isLeaf()) { - tmpNode = tmpNode.getLeft(); - } - int numRows = tmpNode.getData().getNumRows(); - tmpNode = root; - while(!tmpNode.isLeaf()) { - tmpNode = tmpNode.getRight(); - } - int numColumns = tmpNode.getData().getNumColumns(); - - return new MatrixCharacteristics( - numRows, numColumns, (long)(numRows * numColumns * sparsity)); - default: - throw new NotImplementedException(); - } - } - private RSVector getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); if(mb.isInSparseFormat()) { @@ -233,6 +216,43 @@ private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { return nonZeroCols; } + public static DataCharacteristics deriveOutputCharacteristics(MMNode node, double spOut) { + if(node.isLeaf() || + (node.getDataCharacteristics() != null && node.getDataCharacteristics().getNonZeros() != -1)) { + return node.getDataCharacteristics(); + } + + MMNode nodeLeft = node.getLeft(); + MMNode nodeRight = node.getRight(); + switch(node.getOp()) { + case MM: + return new MatrixCharacteristics(nodeLeft.getRows(), nodeRight.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows(), nodeRight.getCols(), spOut)); + case MULT: + case PLUS: + case NEQZERO: + case EQZERO: + return new MatrixCharacteristics(nodeLeft.getRows(), nodeLeft.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows(), nodeLeft.getCols(), spOut)); + case RBIND: + return new MatrixCharacteristics(nodeLeft.getRows()+nodeLeft.getRows(), nodeLeft.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows()+nodeRight.getRows(), nodeLeft.getCols(), spOut)); + case CBIND: + return new MatrixCharacteristics(nodeLeft.getRows(), nodeLeft.getCols()+nodeRight.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows(), nodeLeft.getCols()+nodeRight.getCols(), spOut)); + case DIAG: + int ncol = nodeLeft.getCols()==1 ? nodeLeft.getRows() : 1; + return new MatrixCharacteristics(nodeLeft.getRows(), ncol, + OptimizerUtils.getNnz(nodeLeft.getRows(), ncol, spOut)); + case TRANS: + case RESHAPE: + throw new NotImplementedException("Characteristics derivation for trans and reshape has not been " + + "implemented yet, but could be implemented similar to EstimatorMatrixHistogram.java"); + default: + throw new NotImplementedException(); + } + } + public static class RSVector { private final double[] rs; From 985d049b47001a86f55158df72aafd13da9c1ad6 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Wed, 6 May 2026 16:45:36 +0200 Subject: [PATCH 05/10] feat(test/component/estim): add unit tests for row wise sparsity estimator with element-wise and single operations --- .../component/estim/OpElemWChainTest.java | 15 +++++++- .../test/component/estim/OpElemWTest.java | 14 ++++++- .../test/component/estim/OpSingleTest.java | 37 +++++++++++++++---- 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java index a1b6594a927..2388f50d50e 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java @@ -25,6 +25,7 @@ import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.MMNode; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -118,8 +119,18 @@ public void testLGCasemult() { public void testLGCaseplus() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, n, sparsity, plus); } - - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseCaseMult() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, mult); + } + + @Test + public void testRowWiseCasePlus() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, plus); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int n, double[] sp, OpCode op) { MatrixBlock m1 = MatrixBlock.randOperations(m, n, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(m, n, sp[1], 1, 1, "uniform", 5); diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java index f8ddb91bcef..8d9710dafb1 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java @@ -25,6 +25,7 @@ import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -128,7 +129,18 @@ public void testSampleMult() { public void testSamplePlus() { runSparsityEstimateTest(new EstimatorSample(), m, n, sparsity, plus); } - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseMult() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, mult); + } + + @Test + public void testRowWisePlus() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, plus); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int n, double[] sp, OpCode op) { MatrixBlock m1 = MatrixBlock.randOperations(m, n, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(m, n, sp[1], 1, 1, "uniform", 7); diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java index d40f84c4fb3..1e39847ab37 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java @@ -26,6 +26,7 @@ import org.apache.sysds.hops.estim.EstimatorBasicWorst; import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.hops.estim.SparsityEstimator.OpCode; import org.apache.sysds.runtime.matrix.data.MatrixBlock; @@ -40,7 +41,7 @@ public class OpSingleTest extends AutomatedTestBase private final static int m = 600; private final static int k = 300; private final static double sparsity = 0.2; -// private final static OpCode eqzero = OpCode.EQZERO; + // private final static OpCode eqzero = OpCode.EQZERO; private final static OpCode diag = OpCode.DIAG; private final static OpCode neqzero = OpCode.NEQZERO; private final static OpCode trans = OpCode.TRANS; @@ -237,7 +238,33 @@ public void testLGCasetrans() { // public void testSampleCasereshape() { // runSparsityEstimateTest(new EstimatorSample(), m, k, sparsity, reshape); // } - + + // Row Wise Sparsity Estimator + // @Test + // public void testRowWiseEqzero() { + // runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, eqzero); + // } + + // @Test + // public void testRowWiseDiag() { + // runSparsityEstimateTest(new EstimatorRowWise(), m, m, sparsity, diag); + // } + + @Test + public void testRowWiseNeqzero() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, neqzero); + } + + @Test + public void testRowWiseTrans() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, trans); + } + + @Test + public void testRowWiseReshape() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, reshape); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, double sp, OpCode op) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp, 1, 1, "uniform", 3); MatrixBlock m2 = new MatrixBlock(); @@ -252,13 +279,7 @@ private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int est = estim.estim(m1, op); break; case NEQZERO: - m2 = m1; - est = estim.estim(m1, op); - break; case TRANS: - m2 = m1; - est = estim.estim(m1, op); - break; case RESHAPE: m2 = m1; est = estim.estim(m1, op); From 98274f3faff2840dd0333140b5ea3bca4e994dee Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Wed, 6 May 2026 16:47:21 +0200 Subject: [PATCH 06/10] feat(main/hops/estim/EstimatorRowWise.java): add support for element-wise and single operations NOTE: using average case estimation per row --- .../sysds/hops/estim/EstimatorRowWise.java | 97 +++++++++++++++---- 1 file changed, 78 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index ef951385948..eaffc520fc6 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -49,7 +49,7 @@ public DataCharacteristics estim(MMNode root) { return root.setDataCharacteristics(outputCharacteristics); } - @Override + @Override public double estim(MatrixBlock m1, MatrixBlock m2) { return estim(m1, m2, OpCode.MM); } @@ -99,8 +99,12 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi estimInternChain(node.getLeft()); estimInternChain(node.getRight()); RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - rsOut = (RSVector)estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsCBind, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } else rsOut = (RSVector)rsCBind; break; @@ -111,11 +115,47 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi estimInternChain(node.getLeft()); estimInternChain(node.getRight()); RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - rsOut = (RSVector)estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsRBind, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } else rsOut = (RSVector)rsRBind; break; + case PLUS: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an element-wise operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsPlus = estimInternPlus((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsPlus, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } + else + rsOut = (RSVector)rsPlus; + break; + case MULT: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an element-wise operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsMult = estimInternMult((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsMult, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } + else + rsOut = (RSVector)rsMult; + break; default: throw new NotImplementedException("Chain estimation for operator " + node.getOp().toString() + " is not supported yet."); @@ -139,19 +179,10 @@ private RSVector estimIntern(MatrixBlock m1, RSVector rsM2, OpCode op) { return estimInternCBind(getRowWiseSparsityVector(m1), rsM2); case RBIND: return estimInternRBind(getRowWiseSparsityVector(m1), rsM2); - default: - throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); - } - } - - private RSVector estimIntern(RSVector rsM1, RSVector rsM2, OpCode op) { - switch(op) { - case MM: - return estimInternMM(rsM1, rsM2); - // case CBIND: - // return estimInternCBind(rsM1, rsM2); - // case RBIND: - // return estimInternRBind(rsM1, rsM2); + case PLUS: + return estimInternPlus(getRowWiseSparsityVector(m1), rsM2); + case MULT: + return estimInternMult(getRowWiseSparsityVector(m1), rsM2); default: throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); } @@ -168,7 +199,8 @@ private RSVector estimInternMM(MatrixBlock m1, RSVector rsM2) { } // NOTE: this is the best estimation possible when we only have the two row sparsity vectors - private RSVector estimInternMM(RSVector rsM1, RSVector rsM2) { + private RSVector estimInternMMFallback(RSVector rsM1, RSVector rsM2) { + // NOTE: Considering the average would probably not be far off while saving computing time // double avgRsM2 = DoubleStream.of(rsM2).average().orElse(0); // RSVector rsOut = DoubleStream.of(rsM1).map( // rsM1I -> (double) 1 - Math.pow((double) 1 - (rsM1I * avgRsM2), rsM2.length)).toArray(); @@ -187,6 +219,18 @@ private RSVector estimInternRBind(RSVector rsM1, RSVector rsM2) { return rsM1.append(rsM2); } + private RSVector estimInternPlus(RSVector rsM1, RSVector rsM2) { + // row-wise average case estimates + // rsM1 + rsM2 - (rsM1 * rsM2) + return rsM1.add(rsM2).subtract(rsM1.multiply(rsM2)); + } + + private RSVector estimInternMult(RSVector rsM1, RSVector rsM2) { + // row-wise average case estimates + // rsM1 * rsM2 + return rsM1.multiply(rsM2); + } + private RSVector getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); if(mb.isInSparseFormat()) { @@ -287,5 +331,20 @@ public RSVector map(DoubleUnaryOperator mapper) { public double reduce(double identity, DoubleBinaryOperator op) { return DoubleStream.of(this.rs).reduce(identity, op); } + + public RSVector add(RSVector that) { + return new RSVector(IntStream.range(0, this.size()).mapToDouble( + idx -> this.get(idx) + that.get(idx)).toArray()); + } + + public RSVector subtract(RSVector that) { + return new RSVector(IntStream.range(0, this.size()).mapToDouble( + idx -> this.get(idx) - that.get(idx)).toArray()); + } + + public RSVector multiply(RSVector that) { + return new RSVector(IntStream.range(0, this.size()).mapToDouble( + idx -> this.get(idx) * that.get(idx)).toArray()); + } }; }; From fb03681a2a6cdf5df5d7fed4ed7d2d51a9aecf79 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Mon, 11 May 2026 09:10:59 +0200 Subject: [PATCH 07/10] feat(main/hops/estim/EstimatorRowWise.java): support sparsity estimation for diagonal operation --- .../sysds/hops/estim/EstimatorRowWise.java | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index eaffc520fc6..500e912bd70 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -56,7 +56,7 @@ public double estim(MatrixBlock m1, MatrixBlock m2) { @Override public double estim(MatrixBlock m1, MatrixBlock m2, OpCode op) { - if( isExactMetadataOp(op) ) { + if( isExactMetadataOp(op, m1.getNumColumns()) ) { return estimExactMetaData(m1.getDataCharacteristics(), m2.getDataCharacteristics(), op).getSparsity(); } @@ -67,9 +67,11 @@ public double estim(MatrixBlock m1, MatrixBlock m2, OpCode op) { @Override public double estim(MatrixBlock m1, OpCode op) { - if( isExactMetadataOp(op) ) + if( isExactMetadataOp(op, m1.getNumColumns()) ) return estimExactMetaData(m1.getDataCharacteristics(), null, op).getSparsity(); - throw new NotImplementedException(); + + RSVector rsOut = estimIntern(m1, op); + return rsOut.avg(); } private void estimInternChain(MMNode node) { @@ -103,7 +105,7 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi rsOut = (RSVector)estimInternMMFallback(rsCBind, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + - "considered for MM operation w/ right neighbor, yet"); + "considered for MM operation w/ right neighbor yet."); } else rsOut = (RSVector)rsCBind; @@ -119,7 +121,7 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi rsOut = (RSVector)estimInternMMFallback(rsRBind, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + - "considered for MM operation w/ right neighbor, yet"); + "considered for MM operation w/ right neighbor yet."); } else rsOut = (RSVector)rsRBind; @@ -135,7 +137,7 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi rsOut = (RSVector)estimInternMMFallback(rsPlus, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + - "considered for MM operation w/ right neighbor, yet"); + "considered for MM operation w/ right neighbor yet."); } else rsOut = (RSVector)rsPlus; @@ -151,7 +153,7 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi rsOut = (RSVector)estimInternMMFallback(rsMult, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + - "considered for MM operation w/ right neighbor, yet"); + "considered for MM operation w/ right neighbor yet."); } else rsOut = (RSVector)rsMult; @@ -188,6 +190,15 @@ private RSVector estimIntern(MatrixBlock m1, RSVector rsM2, OpCode op) { } } + private RSVector estimIntern(MatrixBlock mb, OpCode op) { + switch(op) { + case DIAG: + return estimInternDiag(mb); + default: + throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); + } + } + // Corresponds to Algorithm 1 in the publication private RSVector estimInternMM(MatrixBlock m1, RSVector rsM2) { RSVector rsOut = new RSVector(IntStream.range(0, m1.getNumRows()).mapToDouble( @@ -231,6 +242,13 @@ private RSVector estimInternMult(RSVector rsM1, RSVector rsM2) { return rsM1.multiply(rsM2); } + private RSVector estimInternDiag(MatrixBlock mb) { + RSVector rsOut = new RSVector(IntStream.range(0, mb.getNumRows()).mapToDouble( + rIdx -> (mb.get(rIdx, rIdx) == 0) ? 0d : 1d) + .toArray()); + return rsOut; + } + private RSVector getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); if(mb.isInSparseFormat()) { From 403a28e243f36d5d6461281f94d833989a6c2ce3 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Mon, 11 May 2026 09:16:04 +0200 Subject: [PATCH 08/10] feat(test/component/estim/OpSingleTest.java): add test cases for eqzero and diag (mv and vm) operations with the row-wise sparsity estimator --- .../test/component/estim/OpSingleTest.java | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java index 1e39847ab37..f0805a1765b 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java @@ -41,7 +41,7 @@ public class OpSingleTest extends AutomatedTestBase private final static int m = 600; private final static int k = 300; private final static double sparsity = 0.2; - // private final static OpCode eqzero = OpCode.EQZERO; + private final static OpCode eqzero = OpCode.EQZERO; private final static OpCode diag = OpCode.DIAG; private final static OpCode neqzero = OpCode.NEQZERO; private final static OpCode trans = OpCode.TRANS; @@ -240,15 +240,20 @@ public void testLGCasetrans() { // } // Row Wise Sparsity Estimator - // @Test - // public void testRowWiseEqzero() { - // runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, eqzero); - // } + @Test + public void testRowWiseEqzero() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, eqzero); + } - // @Test - // public void testRowWiseDiag() { - // runSparsityEstimateTest(new EstimatorRowWise(), m, m, sparsity, diag); - // } + @Test + public void testRowWiseDiagMV() { + runSparsityEstimateTest(new EstimatorRowWise(), m, m, sparsity, diag); + } + + @Test + public void testRowWiseDiagVM() { + runSparsityEstimateTest(new EstimatorRowWise(), m, 1, sparsity, diag); + } @Test public void testRowWiseNeqzero() { @@ -268,27 +273,32 @@ public void testRowWiseReshape() { private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, double sp, OpCode op) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp, 1, 1, "uniform", 3); MatrixBlock m2 = new MatrixBlock(); + double ref = 1; double est = 0; switch(op) { case EQZERO: - //TODO find out how to do eqzero + ref = 1 - m1.getSparsity(); + est = estim.estim(m1, op); + break; case DIAG: m2 = m1.getNumColumns() == 1 ? LibMatrixReorg.diag(m1, new MatrixBlock(m1.getNumRows(), m1.getNumRows(), false)) : LibMatrixReorg.diag(m1, new MatrixBlock(m1.getNumRows(), 1, false)); + ref = m2.getSparsity(); est = estim.estim(m1, op); break; case NEQZERO: case TRANS: case RESHAPE: m2 = m1; + ref = m2.getSparsity(); est = estim.estim(m1, op); break; default: throw new NotImplementedException(); } //compare estimated and real sparsity - TestUtils.compareScalars(est, m2.getSparsity(), + TestUtils.compareScalars(est, ref, (estim instanceof EstimatorBasicWorst) ? 5e-1 : (estim instanceof EstimatorLayeredGraph) ? 3e-2 : 2e-2); } From 252317a5d3b93243fa01a4dd23a774aa95a5085e Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Mon, 11 May 2026 09:40:28 +0200 Subject: [PATCH 09/10] refactor(main/hops/estim/EstimatorRowWise.java): refactor switch case to consolidate all calls to getters before the switch --- .../sysds/hops/estim/EstimatorRowWise.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index 500e912bd70..e5d6a0ccfae 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -286,29 +286,35 @@ public static DataCharacteristics deriveOutputCharacteristics(MMNode node, doubl MMNode nodeLeft = node.getLeft(); MMNode nodeRight = node.getRight(); + int leftNRow = nodeLeft.getRows(); + int leftNCol = nodeLeft.getCols(); + int rightNRow = nodeRight.getRows(); + int rightNCol = nodeRight.getCols(); switch(node.getOp()) { case MM: - return new MatrixCharacteristics(nodeLeft.getRows(), nodeRight.getCols(), - OptimizerUtils.getNnz(nodeLeft.getRows(), nodeRight.getCols(), spOut)); + return new MatrixCharacteristics(leftNRow, rightNCol, + OptimizerUtils.getNnz(leftNRow, rightNCol, spOut)); case MULT: case PLUS: case NEQZERO: case EQZERO: - return new MatrixCharacteristics(nodeLeft.getRows(), nodeLeft.getCols(), - OptimizerUtils.getNnz(nodeLeft.getRows(), nodeLeft.getCols(), spOut)); + return new MatrixCharacteristics(leftNRow, leftNCol, + OptimizerUtils.getNnz(leftNRow, leftNCol, spOut)); case RBIND: - return new MatrixCharacteristics(nodeLeft.getRows()+nodeLeft.getRows(), nodeLeft.getCols(), - OptimizerUtils.getNnz(nodeLeft.getRows()+nodeRight.getRows(), nodeLeft.getCols(), spOut)); + return new MatrixCharacteristics(leftNRow+rightNRow, leftNCol, + OptimizerUtils.getNnz(leftNRow+rightNRow, leftNCol, spOut)); case CBIND: - return new MatrixCharacteristics(nodeLeft.getRows(), nodeLeft.getCols()+nodeRight.getCols(), - OptimizerUtils.getNnz(nodeLeft.getRows(), nodeLeft.getCols()+nodeRight.getCols(), spOut)); + return new MatrixCharacteristics(leftNRow, leftNCol+rightNCol, + OptimizerUtils.getNnz(leftNRow, leftNCol+rightNCol, spOut)); case DIAG: - int ncol = nodeLeft.getCols()==1 ? nodeLeft.getRows() : 1; - return new MatrixCharacteristics(nodeLeft.getRows(), ncol, - OptimizerUtils.getNnz(nodeLeft.getRows(), ncol, spOut)); + int ncol = (leftNCol == 1) ? leftNRow : 1; + return new MatrixCharacteristics(leftNRow, ncol, + OptimizerUtils.getNnz(leftNRow, ncol, spOut)); case TRANS: + return new MatrixCharacteristics(leftNCol, leftNRow, + OptimizerUtils.getNnz(leftNCol, leftNRow, spOut)); case RESHAPE: - throw new NotImplementedException("Characteristics derivation for trans and reshape has not been " + + throw new NotImplementedException("Characteristics derivation for " + node.getOp() +" has not been " + "implemented yet, but could be implemented similar to EstimatorMatrixHistogram.java"); default: throw new NotImplementedException(); From 23615933fdbe632462a574777bf02776046b5b0c Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Mon, 11 May 2026 10:29:35 +0200 Subject: [PATCH 10/10] refactor(main/hops/estim/EstimatorRowWise.java): remove wrapper class for row-wise sparsity vector and apply the corresponding operations directly in the code instead --- .../sysds/hops/estim/EstimatorRowWise.java | 157 ++++++------------ 1 file changed, 55 insertions(+), 102 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index e5d6a0ccfae..9e92b913522 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -43,7 +43,7 @@ public class EstimatorRowWise extends SparsityEstimator { @Override public DataCharacteristics estim(MMNode root) { estimInternChain(root); - double sparsity = ((RSVector)root.getSynopsis()).avg(); + double sparsity = DoubleStream.of((double[])root.getSynopsis()).average().orElse(0); DataCharacteristics outputCharacteristics = deriveOutputCharacteristics(root, sparsity); return root.setDataCharacteristics(outputCharacteristics); @@ -61,8 +61,8 @@ public double estim(MatrixBlock m1, MatrixBlock m2, OpCode op) { m2.getDataCharacteristics(), op).getSparsity(); } - RSVector rsOut = estimIntern(m1, m2, op); - return rsOut.avg(); + double[] rsOut = estimIntern(m1, m2, op); + return DoubleStream.of(rsOut).average().orElse(0); } @Override @@ -70,16 +70,16 @@ public double estim(MatrixBlock m1, OpCode op) { if( isExactMetadataOp(op, m1.getNumColumns()) ) return estimExactMetaData(m1.getDataCharacteristics(), null, op).getSparsity(); - RSVector rsOut = estimIntern(m1, op); - return rsOut.avg(); + double[] rsOut = estimIntern(m1, op); + return DoubleStream.of(rsOut).average().orElse(0); } private void estimInternChain(MMNode node) { estimInternChain(node, null, null); } - private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRightNeighbor) { - RSVector rsOut; + private void estimInternChain(MMNode node, double[] rsRightNeighbor, OpCode opRightNeighbor) { + double[] rsOut; if(node.isLeaf()) { MatrixBlock mb = node.getData(); if(rsRightNeighbor != null) @@ -91,8 +91,8 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi switch(node.getOp()) { case MM: estimInternChain(node.getRight(), rsRightNeighbor, opRightNeighbor); - estimInternChain(node.getLeft(), (RSVector)(node.getRight().getSynopsis()), node.getOp()); - rsOut = (RSVector)node.getLeft().getSynopsis(); + estimInternChain(node.getLeft(), (double[])(node.getRight().getSynopsis()), node.getOp()); + rsOut = (double[])node.getLeft().getSynopsis(); break; case CBIND: /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of @@ -100,15 +100,15 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi */ estimInternChain(node.getLeft()); estimInternChain(node.getRight()); - RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + double[] rsCBind = estimInternCBind((double[])(node.getLeft().getSynopsis()), (double[])(node.getRight().getSynopsis())); if(rsRightNeighbor != null) { - rsOut = (RSVector)estimInternMMFallback(rsCBind, rsRightNeighbor); + rsOut = (double[])estimInternMMFallback(rsCBind, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + "considered for MM operation w/ right neighbor yet."); } else - rsOut = (RSVector)rsCBind; + rsOut = (double[])rsCBind; break; case RBIND: /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of @@ -116,15 +116,15 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi */ estimInternChain(node.getLeft()); estimInternChain(node.getRight()); - RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + double[] rsRBind = estimInternRBind((double[])(node.getLeft().getSynopsis()), (double[])(node.getRight().getSynopsis())); if(rsRightNeighbor != null) { - rsOut = (RSVector)estimInternMMFallback(rsRBind, rsRightNeighbor); + rsOut = (double[])estimInternMMFallback(rsRBind, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + "considered for MM operation w/ right neighbor yet."); } else - rsOut = (RSVector)rsRBind; + rsOut = (double[])rsRBind; break; case PLUS: /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of @@ -132,15 +132,15 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi */ estimInternChain(node.getLeft()); estimInternChain(node.getRight()); - RSVector rsPlus = estimInternPlus((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + double[] rsPlus = estimInternPlus((double[])(node.getLeft().getSynopsis()), (double[])(node.getRight().getSynopsis())); if(rsRightNeighbor != null) { - rsOut = (RSVector)estimInternMMFallback(rsPlus, rsRightNeighbor); + rsOut = (double[])estimInternMMFallback(rsPlus, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + "considered for MM operation w/ right neighbor yet."); } else - rsOut = (RSVector)rsPlus; + rsOut = (double[])rsPlus; break; case MULT: /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of @@ -148,15 +148,15 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi */ estimInternChain(node.getLeft()); estimInternChain(node.getRight()); - RSVector rsMult = estimInternMult((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + double[] rsMult = estimInternMult((double[])(node.getLeft().getSynopsis()), (double[])(node.getRight().getSynopsis())); if(rsRightNeighbor != null) { - rsOut = (RSVector)estimInternMMFallback(rsMult, rsRightNeighbor); + rsOut = (double[])estimInternMMFallback(rsMult, rsRightNeighbor); if(opRightNeighbor != OpCode.MM) throw new NotImplementedException("Fallback sparsity estimation has only been " + "considered for MM operation w/ right neighbor yet."); } else - rsOut = (RSVector)rsMult; + rsOut = (double[])rsMult; break; default: throw new NotImplementedException("Chain estimation for operator " + node.getOp().toString() + @@ -164,16 +164,16 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi } } node.setSynopsis(rsOut); - node.setDataCharacteristics(deriveOutputCharacteristics(node, rsOut.avg())); + node.setDataCharacteristics(deriveOutputCharacteristics(node, DoubleStream.of(rsOut).average().orElse(0))); return; } - private RSVector estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { - RSVector rsM2 = getRowWiseSparsityVector(m2); + private double[] estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { + double[] rsM2 = getRowWiseSparsityVector(m2); return estimIntern(m1, rsM2, op); } - private RSVector estimIntern(MatrixBlock m1, RSVector rsM2, OpCode op) { + private double[] estimIntern(MatrixBlock m1, double[] rsM2, OpCode op) { switch(op) { case MM: return estimInternMM(m1, rsM2); @@ -190,7 +190,7 @@ private RSVector estimIntern(MatrixBlock m1, RSVector rsM2, OpCode op) { } } - private RSVector estimIntern(MatrixBlock mb, OpCode op) { + private double[] estimIntern(MatrixBlock mb, OpCode op) { switch(op) { case DIAG: return estimInternDiag(mb); @@ -200,56 +200,59 @@ private RSVector estimIntern(MatrixBlock mb, OpCode op) { } // Corresponds to Algorithm 1 in the publication - private RSVector estimInternMM(MatrixBlock m1, RSVector rsM2) { - RSVector rsOut = new RSVector(IntStream.range(0, m1.getNumRows()).mapToDouble( + private double[] estimInternMM(MatrixBlock m1, double[] rsM2) { + double[] rsOut = IntStream.range(0, m1.getNumRows()).mapToDouble( r -> (double) 1 - IntStream.of(getNonZeroColumnIndices(m1, r)).mapToDouble( - c -> (double) 1 - rsM2.get(c) + c -> (double) 1 - rsM2[c] ).reduce((double) 1, (currentVal, val) -> currentVal * val)) - .toArray()); + .toArray(); return rsOut; } // NOTE: this is the best estimation possible when we only have the two row sparsity vectors - private RSVector estimInternMMFallback(RSVector rsM1, RSVector rsM2) { + private double[] estimInternMMFallback(double[] rsM1, double[] rsM2) { // NOTE: Considering the average would probably not be far off while saving computing time // double avgRsM2 = DoubleStream.of(rsM2).average().orElse(0); - // RSVector rsOut = DoubleStream.of(rsM1).map( + // double[] rsOut = DoubleStream.of(rsM1).map( // rsM1I -> (double) 1 - Math.pow((double) 1 - (rsM1I * avgRsM2), rsM2.length)).toArray(); - RSVector rsOut = rsM1.map( - rsM1I -> (double) 1 - rsM2.reduce((double) 1, - (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))); + double[] rsOut = DoubleStream.of(rsM1).map( + rsM1I -> (double) 1 - DoubleStream.of(rsM2).reduce((double) 1, + (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))).toArray(); return rsOut; } - private RSVector estimInternCBind(RSVector rsM1, RSVector rsM2) { - return new RSVector(IntStream.range(0, rsM1.size()).mapToDouble( - idx -> (rsM1.get(idx) + rsM2.get(idx)) / (double) 2).toArray()); + private double[] estimInternCBind(double[] rsM1, double[] rsM2) { + // FIXME: this assumes that the number of columns is equivalent for both inputs + return IntStream.range(0, rsM1.length).mapToDouble( + idx -> (rsM1[idx] + rsM2[idx]) / (double) 2).toArray(); } - private RSVector estimInternRBind(RSVector rsM1, RSVector rsM2) { - return rsM1.append(rsM2); + private double[] estimInternRBind(double[] rsM1, double[] rsM2) { + return ArrayUtils.addAll(rsM1, rsM2); } - private RSVector estimInternPlus(RSVector rsM1, RSVector rsM2) { + private double[] estimInternPlus(double[] rsM1, double[] rsM2) { // row-wise average case estimates // rsM1 + rsM2 - (rsM1 * rsM2) - return rsM1.add(rsM2).subtract(rsM1.multiply(rsM2)); + return IntStream.range(0, rsM1.length).mapToDouble( + idx -> rsM1[idx] + rsM2[idx] - (rsM1[idx] * rsM2[idx])).toArray(); } - private RSVector estimInternMult(RSVector rsM1, RSVector rsM2) { + private double[] estimInternMult(double[] rsM1, double[] rsM2) { // row-wise average case estimates // rsM1 * rsM2 - return rsM1.multiply(rsM2); + return IntStream.range(0, rsM1.length).mapToDouble( + idx -> rsM1[idx] * rsM2[idx]).toArray(); } - private RSVector estimInternDiag(MatrixBlock mb) { - RSVector rsOut = new RSVector(IntStream.range(0, mb.getNumRows()).mapToDouble( + private double[] estimInternDiag(MatrixBlock mb) { + double[] rsOut = IntStream.range(0, mb.getNumRows()).mapToDouble( rIdx -> (mb.get(rIdx, rIdx) == 0) ? 0d : 1d) - .toArray()); + .toArray(); return rsOut; } - private RSVector getRowWiseSparsityVector(MatrixBlock mb) { + private double[] getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); if(mb.isInSparseFormat()) { double[] rsArray = new double[numRows]; @@ -257,11 +260,12 @@ private RSVector getRowWiseSparsityVector(MatrixBlock mb) { SparseRow sparseRow = mb.getSparseBlock().get(counter); rsArray[counter] = (sparseRow == null) ? 0 : (double) sparseRow.size() / mb.getNumColumns(); } - return new RSVector(rsArray); + return rsArray; } else { - return new RSVector(IntStream.range(0, numRows).mapToDouble( - rIdx -> (double) mb.getDenseBlock().countNonZeros(rIdx) / mb.getNumColumns()).toArray()); + return IntStream.range(0, numRows).mapToDouble( + rIdx -> (double) mb.getDenseBlock().countNonZeros(rIdx) / mb.getNumColumns()) + .toArray(); } } @@ -320,55 +324,4 @@ public static DataCharacteristics deriveOutputCharacteristics(MMNode node, doubl throw new NotImplementedException(); } } - - public static class RSVector { - private final double[] rs; - - public RSVector(double[] rs) { - this.rs = rs; - } - - public double[] get() { - return this.rs; - } - - public double get(int idx) { - return this.rs[idx]; - } - - public int size() { - return this.rs.length; - } - - public double avg() { - return DoubleStream.of(this.rs).average().orElse(0); - } - - public RSVector append(RSVector that) { - return new RSVector(ArrayUtils.addAll(this.rs, that.get())); - } - - public RSVector map(DoubleUnaryOperator mapper) { - return new RSVector(DoubleStream.of(this.rs).map(mapper).toArray()); - } - - public double reduce(double identity, DoubleBinaryOperator op) { - return DoubleStream.of(this.rs).reduce(identity, op); - } - - public RSVector add(RSVector that) { - return new RSVector(IntStream.range(0, this.size()).mapToDouble( - idx -> this.get(idx) + that.get(idx)).toArray()); - } - - public RSVector subtract(RSVector that) { - return new RSVector(IntStream.range(0, this.size()).mapToDouble( - idx -> this.get(idx) - that.get(idx)).toArray()); - } - - public RSVector multiply(RSVector that) { - return new RSVector(IntStream.range(0, this.size()).mapToDouble( - idx -> this.get(idx) * that.get(idx)).toArray()); - } - }; };