From b570380bc85da2f66dc8767a5c12b02c35a744e0 Mon Sep 17 00:00:00 2001 From: Hana Halitim Date: Sun, 23 Nov 2025 22:23:28 +0100 Subject: [PATCH 1/6] SYSTEMDS-3539 Implement delta encoding (Parts 1, 2, and 3) --- .../runtime/compress/CompressionSettings.java | 6 +- .../compress/CompressionSettingsBuilder.java | 17 +- .../runtime/compress/colgroup/AColGroup.java | 4 + .../colgroup/AColGroupCompressed.java | 4 + .../compress/colgroup/AColGroupValue.java | 4 + .../compress/colgroup/ADictBasedColGroup.java | 5 + .../runtime/compress/colgroup/APreAgg.java | 4 + .../compress/colgroup/ColGroupDDC.java | 7 +- .../compress/colgroup/ColGroupDeltaDDC.java | 137 +++++--- .../compress/colgroup/ColGroupFactory.java | 127 +++++++ .../sysds/runtime/compress/estim/AComEst.java | 22 +- .../estim/CompressedSizeInfoColGroup.java | 6 +- .../estim/encoding/EncodingFactory.java | 65 +++- .../runtime/compress/lib/CLALibUnary.java | 39 ++- .../readers/ReaderColumnSelection.java | 56 ++- ...erColumnSelectionDenseMultiBlockDelta.java | 63 ++++ ...rColumnSelectionDenseSingleBlockDelta.java | 65 ++++ .../ReaderColumnSelectionSparseDelta.java | 91 +++++ .../colgroup/ColGroupDeltaDDCTest.java | 139 ++++---- .../estim/encoding/EncodeDeltaTest.java | 250 ++++++++++++++ .../estim/encoding/EncodeNegativeTest.java | 23 +- .../compress/lib/CLALibUnaryDeltaTest.java | 296 ++++++++++++++++ .../compress/readers/ReadersDeltaTest.java | 323 ++++++++++++++++++ .../compress/readers/ReadersTest.java | 51 ++- 24 files changed, 1674 insertions(+), 130 deletions(-) create mode 100644 src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseMultiBlockDelta.java create mode 100644 src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseSingleBlockDelta.java create mode 100644 src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionSparseDelta.java create mode 100644 src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java create mode 100644 src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java create mode 100644 src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java index f6321bc1b6d..af944fce750 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java @@ -133,11 +133,14 @@ public class CompressionSettings { public final double[] scaleFactors; + public final boolean preferDeltaEncoding; + protected CompressionSettings(double samplingRatio, double samplePower, boolean allowSharedDictionary, String transposeInput, int seed, boolean lossy, EnumSet validCompressions, boolean sortValuesByLength, PartitionerType columnPartitioner, int maxColGroupCoCode, double coCodePercentage, int minimumSampleSize, int maxSampleSize, EstimationType estimationType, CostType costComputationType, - double minimumCompressionRatio, boolean isInSparkInstruction, SORT_TYPE sdcSortType, double[] scaleFactors) { + double minimumCompressionRatio, boolean isInSparkInstruction, SORT_TYPE sdcSortType, double[] scaleFactors, + boolean preferDeltaEncoding) { this.samplingRatio = samplingRatio; this.samplePower = samplePower; this.allowSharedDictionary = allowSharedDictionary; @@ -157,6 +160,7 @@ protected CompressionSettings(double samplingRatio, double samplePower, boolean this.isInSparkInstruction = isInSparkInstruction; this.sdcSortType = sdcSortType; this.scaleFactors = scaleFactors; + this.preferDeltaEncoding = preferDeltaEncoding; if(!printedStatus && LOG.isDebugEnabled()) { printedStatus = true; diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java index ae6a0b2d231..02c9f97498d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java @@ -53,6 +53,7 @@ public class CompressionSettingsBuilder { private boolean isInSparkInstruction = false; private SORT_TYPE sdcSortType = SORT_TYPE.MATERIALIZE; private double[] scaleFactors = null; + private boolean preferDeltaEncoding = false; public CompressionSettingsBuilder() { @@ -101,6 +102,7 @@ public CompressionSettingsBuilder copySettings(CompressionSettings that) { this.maxColGroupCoCode = that.maxColGroupCoCode; this.coCodePercentage = that.coCodePercentage; this.minimumSampleSize = that.minimumSampleSize; + this.preferDeltaEncoding = that.preferDeltaEncoding; return this; } @@ -336,6 +338,19 @@ public CompressionSettingsBuilder setSDCSortType(SORT_TYPE sdcSortType) { return this; } + /** + * Set whether to prefer delta encoding during compression estimation. + * When enabled, the compression estimator will use delta encoding statistics + * instead of regular encoding statistics. + * + * @param preferDeltaEncoding Whether to prefer delta encoding + * @return The CompressionSettingsBuilder + */ + public CompressionSettingsBuilder setPreferDeltaEncoding(boolean preferDeltaEncoding) { + this.preferDeltaEncoding = preferDeltaEncoding; + return this; + } + /** * Create the CompressionSettings object to use in the compression. * @@ -345,6 +360,6 @@ public CompressionSettings create() { return new CompressionSettings(samplingRatio, samplePower, allowSharedDictionary, transposeInput, seed, lossy, validCompressions, sortValuesByLength, columnPartitioner, maxColGroupCoCode, coCodePercentage, minimumSampleSize, maxSampleSize, estimationType, costType, minimumCompressionRatio, isInSparkInstruction, - sdcSortType, scaleFactors); + sdcSortType, scaleFactors, preferDeltaEncoding); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java index ec502d6d122..57503f64884 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java @@ -93,6 +93,10 @@ protected static enum ColGroupType { /** The ColGroup indexes contained in the ColGroup */ protected final IColIndex _colIndexes; + protected AColGroup() { + _colIndexes = null; + } + /** * Main constructor. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java index b83f133647e..8e8211e40fc 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java @@ -46,6 +46,10 @@ public abstract class AColGroupCompressed extends AColGroup { private static final long serialVersionUID = 6219835795420081223L; + protected AColGroupCompressed() { + super(); + } + protected AColGroupCompressed(IColIndex colIndices) { super(colIndices); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java index 0cde289b30f..bd9834938e6 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java @@ -35,6 +35,10 @@ public abstract class AColGroupValue extends ADictBasedColGroup { /** The count of each distinct value contained in the dictionary */ private SoftReference counts = null; + protected AColGroupValue() { + super(); + } + /** * A abstract class for column groups that contain IDictionary for values. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java index 8f2f0b46055..16bfe7d0924 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java @@ -43,6 +43,11 @@ public abstract class ADictBasedColGroup extends AColGroupCompressed implements /** Distinct value tuples associated with individual bitmaps. */ protected final IDictionary _dict; + protected ADictBasedColGroup() { + super(); + _dict = null; + } + /** * A Abstract class for column groups that contain IDictionary for values. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java index bddc50d4bcb..7f85ad9519a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java @@ -41,6 +41,10 @@ public abstract class APreAgg extends AColGroupValue { private static boolean loggedWarningForDirect = false; + protected APreAgg() { + super(); + } + /** * A Abstract class for column groups that contain IDictionary for values. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index fc82c58e16b..0c9c6eb067a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -77,7 +77,12 @@ public class ColGroupDDC extends APreAgg implements IMapToDataGroup { static final VectorSpecies SPECIES = DoubleVector.SPECIES_PREFERRED; - private ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { + protected ColGroupDDC() { + super(); + _data = null; + } + + protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { super(colIndexes, dict, cachedCounts); _data = data; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java index 2666860ca68..292931a0f38 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java @@ -19,62 +19,107 @@ package org.apache.sysds.runtime.compress.colgroup; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.DMLCompressionException; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; + /** * Class to encapsulate information about a column group that is first delta encoded then encoded with dense dictionary * encoding (DeltaDDC). */ -public class ColGroupDeltaDDC { // extends ColGroupDDC +public class ColGroupDeltaDDC extends ColGroupDDC { + private static final long serialVersionUID = -1045556313148564147L; -// private static final long serialVersionUID = -1045556313148564147L; + /** Constructor for serialization */ + protected ColGroupDeltaDDC() { + super(); + } -// /** Constructor for serialization */ -// protected ColGroupDeltaDDC() { -// } + private ColGroupDeltaDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { + super(colIndexes, dict, data, cachedCounts); + if(CompressedMatrixBlock.debug) { + if(!(dict instanceof DeltaDictionary)) + throw new DMLCompressionException("DeltaDDC must use DeltaDictionary"); + } + } -// private ColGroupDeltaDDC(int[] colIndexes, ADictionary dict, AMapToData data, int[] cachedCounts) { -// super(); -// LOG.info("Carefully use of DeltaDDC since implementation is not finished."); -// _colIndexes = colIndexes; -// _dict = dict; -// _data = data; -// } + public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { + if(data.getUnique() == 1) + return ColGroupConst.create(colIndexes, dict); + else if(dict == null) + return new ColGroupEmpty(colIndexes); + else + return new ColGroupDeltaDDC(colIndexes, dict, data, cachedCounts); + } -// public static AColGroup create(int[] colIndices, ADictionary dict, AMapToData data, int[] cachedCounts) { -// if(dict == null) -// throw new NotImplementedException("Not implemented constant delta group"); -// else -// return new ColGroupDeltaDDC(colIndices, dict, data, cachedCounts); -// } + @Override + public CompressionType getCompType() { + return CompressionType.DeltaDDC; + } -// public CompressionType getCompType() { -// return CompressionType.DeltaDDC; -// } + @Override + protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + double[] values) { + final int nCol = _colIndexes.size(); + final double[] prevRow = new double[nCol]; + + if(rl > 0) { + final double[] prevRowData = db.values(rl - 1 + offR); + final int prevOff = db.pos(rl - 1 + offR) + offC; + for(int j = 0; j < nCol; j++) { + prevRow[j] = prevRowData[prevOff + _colIndexes.get(j)]; + } + } -// @Override -// protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, -// double[] values) { -// final int nCol = _colIndexes.length; -// for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { -// final double[] c = db.values(offT); -// final int off = db.pos(offT) + offC; -// final int rowIndex = _data.getIndex(i) * nCol; -// final int prevOff = (off == 0) ? off : off - nCol; -// for(int j = 0; j < nCol; j++) { -// // Here we use the values in the previous row to compute current values along with the delta -// double newValue = c[prevOff + j] + values[rowIndex + j]; -// c[off + _colIndexes[j]] += newValue; -// } -// } -// } + for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { + final double[] c = db.values(offT); + final int off = db.pos(offT) + offC; + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + + if(i == 0 && rl == 0) { + for(int j = 0; j < nCol; j++) { + final double value = values[rowIndex + j]; + final int colIdx = _colIndexes.get(j); + c[off + colIdx] = value; + prevRow[j] = value; + } + } + else { + for(int j = 0; j < nCol; j++) { + final double delta = values[rowIndex + j]; + final double newValue = prevRow[j] + delta; + final int colIdx = _colIndexes.get(j); + c[off + colIdx] = newValue; + prevRow[j] = newValue; + } + } + } + } -// @Override -// protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, -// double[] values) { -// throw new NotImplementedException(); -// } + @Override + protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + double[] values) { + throw new NotImplementedException("Sparse block decompression for DeltaDDC not yet implemented"); + } -// @Override -// public AColGroup scalarOperation(ScalarOperator op) { -// return new ColGroupDeltaDDC(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts()); -// } + @Override + public AColGroup scalarOperation(ScalarOperator op) { + if(_dict instanceof DeltaDictionary) { + DeltaDictionary deltaDict = (DeltaDictionary) _dict; + IDictionary newDict = deltaDict.applyScalarOp(op); + return new ColGroupDeltaDDC(_colIndexes, newDict, _data, getCachedCounts()); + } + else { + throw new DMLRuntimeException("DeltaDDC must use DeltaDictionary"); + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index c6a098f5c32..d368b84dc2b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -38,6 +38,7 @@ import org.apache.sysds.runtime.compress.bitmap.ABitmap; import org.apache.sysds.runtime.compress.bitmap.BitmapEncoder; import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; @@ -288,6 +289,9 @@ else if((ct == CompressionType.SDC || ct == CompressionType.CONST) // else if(ct == CompressionType.DDC) { return directCompressDDC(colIndexes, cg); } + else if(ct == CompressionType.DeltaDDC) { + return directCompressDeltaDDC(colIndexes, cg); + } else if(ct == CompressionType.LinearFunctional) { if(cs.scaleFactors != null) { throw new NotImplementedException(); // quantization-fused compression NOT allowed @@ -684,6 +688,129 @@ private AColGroup directCompressDDCMultiCol(IColIndex colIndexes, CompressedSize return ColGroupDDC.create(colIndexes, dict, resData, null); } + private AColGroup directCompressDeltaDDC(IColIndex colIndexes, CompressedSizeInfoColGroup cg) throws Exception { + if(cs.transposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + if(cs.scaleFactors != null) { + throw new NotImplementedException("Delta encoding with quantization not yet implemented"); + } + + if(colIndexes.size() > 1) { + return directCompressDeltaDDCMultiCol(colIndexes, cg); + } + else { + return directCompressDeltaDDCSingleCol(colIndexes, cg); + } + } + + private AColGroup directCompressDeltaDDCSingleCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg) { + final int col = colIndexes.get(0); + final AMapToData d = MapToFactory.create(nRow, Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126)); + final DoubleCountHashMap map = new DoubleCountHashMap(cg.getNumVals()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(in, colIndexes, cs.transposed, 0, nRow); + DblArray cellVals = reader.nextRow(); + int r = 0; + while(r < nRow && cellVals != null) { + final int row = reader.getCurrentRowIndex(); + if(row == r) { + final double val = cellVals.getData()[0]; + final int id = map.increment(val); + d.set(row, id); + cellVals = reader.nextRow(); + r++; + } + else { + r = row; + } + } + + if(map.size() == 0) + return new ColGroupEmpty(colIndexes); + + final double[] dictValues = map.getDictionary(); + IDictionary dict = new DeltaDictionary(dictValues, 1); + + final int nUnique = map.size(); + final AMapToData resData = d.resize(nUnique); + return ColGroupDeltaDDC.create(colIndexes, dict, resData, null); + } + + private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg) throws Exception { + final AMapToData d = MapToFactory.create(nRow, Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126)); + final int fill = d.getUpperBoundValue(); + d.fill(fill); + + final DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(cg.getNumVals(), 64)); + boolean extra; + if(nRow < CompressionSettings.PAR_DDC_THRESHOLD || k < csi.getNumberColGroups() || pool == null) { + extra = readToMapDeltaDDC(colIndexes, map, d, 0, nRow, fill); + } + else { + throw new NotImplementedException("Parallel delta DDC compression not yet implemented"); + } + + if(map.size() == 0) + return new ColGroupEmpty(colIndexes); + + final ACount[] vals = map.extractValues(); + final int nVals = vals.length; + final double[] dictValues = new double[nVals * colIndexes.size()]; + final int[] oldIdToNewId = new int[map.size()]; + int idx = 0; + for(int i = 0; i < nVals; i++) { + final ACount dac = vals[i]; + final double[] arrData = dac.key().getData(); + System.arraycopy(arrData, 0, dictValues, idx, colIndexes.size()); + oldIdToNewId[dac.id] = i; + idx += colIndexes.size(); + } + IDictionary dict = new DeltaDictionary(dictValues, colIndexes.size()); + + if(extra) + d.replace(fill, map.size()); + final int nUnique = map.size() + (extra ? 1 : 0); + final AMapToData resData = d.resize(nUnique); + for(int i = 0; i < nRow; i++) { + final int oldId = resData.getIndex(i); + if(extra && oldId == map.size()) { + resData.set(i, nVals); + } + else if(oldId < oldIdToNewId.length) { + resData.set(i, oldIdToNewId[oldId]); + } + } + return ColGroupDeltaDDC.create(colIndexes, dict, resData, null); + } + + private boolean readToMapDeltaDDC(IColIndex colIndexes, DblArrayCountHashMap map, AMapToData data, int rl, int ru, + int fill) { + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(in, colIndexes, cs.transposed, rl, ru); + + DblArray cellVals = reader.nextRow(); + boolean extra = false; + int r = rl; + while(r < ru && cellVals != null) { + final int row = reader.getCurrentRowIndex(); + if(row == r) { + final int id = map.increment(cellVals); + data.set(row, id); + cellVals = reader.nextRow(); + r++; + } + else { + r = row; + extra = true; + } + } + + if(r < ru) + extra = true; + + return extra; + } + private boolean readToMapDDC(IColIndex colIndexes, DblArrayCountHashMap map, AMapToData data, int rl, int ru, int fill) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java b/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java index 2dce0bafe4e..ef7981e941b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java @@ -197,7 +197,10 @@ public final CompressedSizeInfoColGroup combine(IColIndex combinedColumns, Compr return null; // This combination is clearly not a good idea return null to indicate that. else if(g1.getMap() == null || g2.getMap() == null) // the previous information did not contain maps, therefore fall back to extract from sample - return getColGroupInfo(combinedColumns, Math.max(g1V, g2V), (int) max); + if(_cs.preferDeltaEncoding) + return getDeltaColGroupInfo(combinedColumns, Math.max(g1V, g2V), (int) max); + else + return getColGroupInfo(combinedColumns, Math.max(g1V, g2V), (int) max); else // Default combine the previous subject to max value calculated. return combine(combinedColumns, g1, g2, (int) max); } @@ -254,8 +257,12 @@ private List CompressedSizeInfoColGroupSingleThread( List ret = new ArrayList<>(clen); if(!_cs.transposed && !_data.isEmpty() && _data.isInSparseFormat()) nnzCols = LibMatrixReorg.countNnzPerColumn(_data); - for(int col = 0; col < clen; col++) - ret.add(getColGroupInfo(new SingleIndex(col))); + for(int col = 0; col < clen; col++) { + if(_cs.preferDeltaEncoding) + ret.add(getDeltaColGroupInfo(new SingleIndex(col))); + else + ret.add(getColGroupInfo(new SingleIndex(col))); + } return ret; } @@ -286,9 +293,14 @@ private List CompressedSizeInfoColGroupParallel(int for(int col = 0; col < clen; col += blkz) { final int start = col; final int end = Math.min(clen, col + blkz); + final boolean useDelta = _cs.preferDeltaEncoding; tasks.add(pool.submit(() -> { - for(int c = start; c < end; c++) - res[c] = getColGroupInfo(new SingleIndex(c)); + for(int c = start; c < end; c++) { + if(useDelta) + res[c] = getDeltaColGroupInfo(new SingleIndex(c)); + else + res[c] = getColGroupInfo(new SingleIndex(c)); + } return null; })); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java index 963a044d14f..df353931c0b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java @@ -255,7 +255,11 @@ private static double getCompressionSize(IColIndex cols, CompressionType ct, Est case LinearFunctional: return ColGroupSizes.estimateInMemorySizeLinearFunctional(numCols, contiguousColumns); case DeltaDDC: - throw new NotImplementedException(); + // DeltaDDC has the same size estimation as DDC since it uses the same structure + // The delta encoding is just a different way of interpreting the data + nv = fact.numVals + (fact.numOffs < fact.numRows ? 1 : 0); + return ColGroupSizes.estimateInMemorySizeDDC(numCols, contiguousColumns, nv, fact.numRows, + fact.tupleSparsity, fact.lossy); case DDC: nv = fact.numVals + (fact.numOffs < fact.numRows ? 1 : 0); return ColGroupSizes.estimateInMemorySizeDDC(numCols, contiguousColumns, nv, fact.numRows, diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java index b196da658c3..7d402bc20d6 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java @@ -88,9 +88,8 @@ else if(rowCols.size() == 1) { * @return A delta encoded encoding. */ public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean transposed, IColIndex rowCols) { - throw new NotImplementedException(); - // final int sampleSize = transposed ? m.getNumColumns() : m.getNumRows(); - // return createFromMatrixBlockDelta(m, transposed, rowCols, sampleSize); + final int sampleSize = transposed ? m.getNumColumns() : m.getNumRows(); + return createFromMatrixBlockDelta(m, transposed, rowCols, sampleSize); } /** @@ -107,7 +106,7 @@ public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean transpos */ public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean transposed, IColIndex rowCols, int sampleSize) { - throw new NotImplementedException(); + return createWithDeltaReader(m, rowCols, transposed, sampleSize); } /** @@ -691,4 +690,62 @@ private static IEncode createWithReaderSparse(MatrixBlock m, DblArrayCountHashMa public static SparseEncoding createSparse(AMapToData map, AOffset off, int nRows) { return new SparseEncoding(map, off, nRows); } + + private static IEncode createWithDeltaReader(MatrixBlock m, IColIndex rowCols, boolean transposed, int sampleSize) { + final int rl = 0; + final int ru = Math.min(sampleSize, transposed ? m.getNumColumns() : m.getNumRows()); + final ReaderColumnSelection reader1 = ReaderColumnSelection.createDeltaReader(m, rowCols, transposed, rl, ru); + final int nRows = transposed ? m.getNumColumns() : m.getNumRows(); + final DblArrayCountHashMap map = new DblArrayCountHashMap(); + final IntArrayList offsets = new IntArrayList(); + DblArray cellVals = reader1.nextRow(); + + while(cellVals != null) { + map.increment(cellVals); + offsets.appendValue(reader1.getCurrentRowIndex()); + cellVals = reader1.nextRow(); + } + + if(offsets.size() == 0) + return new EmptyEncoding(); + else if(map.size() == 1 && offsets.size() == ru) + return new ConstEncoding(ru); + + final ReaderColumnSelection reader2 = ReaderColumnSelection.createDeltaReader(m, rowCols, transposed, rl, ru); + if(offsets.size() < ru / 4) + return createWithDeltaReaderSparse(m, map, rowCols, offsets, ru, reader2); + else + return createWithDeltaReaderDense(m, map, rowCols, ru, offsets.size() < ru, reader2); + } + + private static IEncode createWithDeltaReaderDense(MatrixBlock m, DblArrayCountHashMap map, IColIndex rowCols, + int nRows, boolean zero, ReaderColumnSelection reader2) { + final int unique = map.size() + (zero ? 1 : 0); + final AMapToData d = MapToFactory.create(nRows, unique); + + DblArray cellVals; + if(zero) + while((cellVals = reader2.nextRow()) != null) + d.set(reader2.getCurrentRowIndex(), map.getId(cellVals) + 1); + else + while((cellVals = reader2.nextRow()) != null) + d.set(reader2.getCurrentRowIndex(), map.getId(cellVals)); + + return new DenseEncoding(d); + } + + private static IEncode createWithDeltaReaderSparse(MatrixBlock m, DblArrayCountHashMap map, IColIndex rowCols, + IntArrayList offsets, int nRows, ReaderColumnSelection reader2) { + DblArray cellVals = reader2.nextRow(); + final AMapToData d = MapToFactory.create(offsets.size(), map.size()); + + int i = 0; + while(cellVals != null) { + d.set(i++, map.getId(cellVals)); + cellVals = reader2.nextRow(); + } + + final AOffset o = OffsetFactory.createOffset(offsets); + return new SparseEncoding(d, o, nRows); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java index f858f15b746..781c13de1d8 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java @@ -21,10 +21,15 @@ import java.util.ArrayList; import java.util.List; +import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.CompressionStatistics; import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode; import org.apache.sysds.runtime.matrix.data.LibMatrixAgg; @@ -43,6 +48,35 @@ public static MatrixBlock unaryOperations(CompressedMatrixBlock m, UnaryOperator final boolean overlapping = m.isOverlapping(); final int r = m.getNumRows(); final int c = m.getNumColumns(); + + if(Builtin.isBuiltinCode(op.fn, BuiltinCode.CUMSUM, BuiltinCode.ROWCUMSUM)) { + MatrixBlock uncompressed = m.getUncompressed("CUMSUM/ROWCUMSUM requires uncompression", op.getNumThreads()); + MatrixBlock opResult = uncompressed.unaryOperations(op, null); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + csb.clearValidCompression(); + csb.setPreferDeltaEncoding(true); + csb.addValidCompression(CompressionType.DeltaDDC); + csb.setMinimumCompressionRatio(0.0); + csb.setTransposeInput("false"); + Pair compressedPair = CompressedMatrixBlockFactory.compress(opResult, op.getNumThreads(), csb); + MatrixBlock compressedResult = compressedPair.getLeft(); + + if(compressedResult == null) { + compressedResult = opResult; + } + + CompressedMatrixBlock finalResult; + if(compressedResult instanceof CompressedMatrixBlock) { + finalResult = (CompressedMatrixBlock) compressedResult; + } + else { + finalResult = CompressedMatrixBlockFactory.genUncompressedCompressedMatrixBlock(compressedResult); + } + + return finalResult; + } + // early aborts: if(m.isEmpty()) return new MatrixBlock(r, c, 0).unaryOperations(op, result); @@ -64,8 +98,9 @@ else if(Builtin.isBuiltinCode(op.fn, BuiltinCode.ISINF, BuiltinCode.ISNAN, Built return new MatrixBlock(r, c, 0); // avoid unnecessary allocation else if(LibMatrixAgg.isSupportedUnaryOperator(op)) { String message = "Unary Op not supported: " + op.fn.getClass().getSimpleName(); - // e.g., cumsum/cumprod/cummin/cumax/cumsumprod - return m.getUncompressed(message, op.getNumThreads()).unaryOperations(op, null); + MatrixBlock uncompressed = m.getUncompressed(message, op.getNumThreads()); + MatrixBlock opResult = uncompressed.unaryOperations(op, null); + return opResult; } else { diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java index d6ec60336f0..1734d39f4ce 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java @@ -193,13 +193,63 @@ else if(rawBlock.isInSparseFormat()) { else { return new ReaderColumnSelectionDenseSingleBlockQuantized(rawBlock, colIndices, rl, ru, scaleFactors); } - } + } + + /** + * Create a reader of the matrix block that computes delta values (current row - previous row) on-the-fly. + * + * Note the reader reuse the return, therefore if needed for something please copy the returned rows. + * The first row is returned as-is (no delta computation). + * + * @param rawBlock The block to iterate though + * @param colIndices The column indexes to extract and insert into the double array + * @param transposed If the raw block should be treated as transposed + * @return A delta reader of the columns specified + */ + public static ReaderColumnSelection createDeltaReader(MatrixBlock rawBlock, IColIndex colIndices, boolean transposed) { + final int rl = 0; + final int ru = transposed ? rawBlock.getNumColumns() : rawBlock.getNumRows(); + return createDeltaReader(rawBlock, colIndices, transposed, rl, ru); + } + + /** + * Create a reader of the matrix block that computes delta values (current row - previous row) on-the-fly. + * + * Note the reader reuse the return, therefore if needed for something please copy the returned rows. + * The first row is returned as-is (no delta computation). + * + * @param rawBlock The block to iterate though + * @param colIndices The column indexes to extract and insert into the double array + * @param transposed If the raw block should be treated as transposed + * @param rl The row to start at + * @param ru The row to end at (not inclusive) + * @return A delta reader of the columns specified + */ + public static ReaderColumnSelection createDeltaReader(MatrixBlock rawBlock, IColIndex colIndices, boolean transposed, + int rl, int ru) { + checkInput(rawBlock, colIndices, rl, ru, transposed); + rl = rl - 1; + if(rawBlock.isEmpty()) { + LOG.warn("It is likely an error occurred when reading an empty block, but we do support it!"); + return new ReaderColumnSelectionEmpty(rawBlock, colIndices, rl, ru, transposed); + } + + if(transposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + + if(rawBlock.isInSparseFormat()) + return new ReaderColumnSelectionSparseDelta(rawBlock, colIndices, rl, ru); + else if(rawBlock.getDenseBlock().numBlocks() > 1) + return new ReaderColumnSelectionDenseMultiBlockDelta(rawBlock, colIndices, rl, ru); + return new ReaderColumnSelectionDenseSingleBlockDelta(rawBlock, colIndices, rl, ru); + } private static void checkInput(final MatrixBlock rawBlock, final IColIndex colIndices, final int rl, final int ru, final boolean transposed) { - if(colIndices.size() <= 1) + if(colIndices.size() < 1) throw new DMLCompressionException( - "Column selection reader should not be done on single column groups: " + colIndices); + "Column selection reader should not be done on empty column groups: " + colIndices); else if(rl >= ru) throw new DMLCompressionException("Invalid inverse range for reader " + rl + " to " + ru); diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseMultiBlockDelta.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseMultiBlockDelta.java new file mode 100644 index 00000000000..f700ebd94b7 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseMultiBlockDelta.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.readers; + +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class ReaderColumnSelectionDenseMultiBlockDelta extends ReaderColumnSelection { + private final DenseBlock _data; + private final double[] _previousRow; + private boolean _isFirstRow; + + protected ReaderColumnSelectionDenseMultiBlockDelta(MatrixBlock data, IColIndex colIndices, int rl, int ru) { + super(colIndices, rl, Math.min(ru, data.getNumRows()) - 1); + _data = data.getDenseBlock(); + _previousRow = new double[colIndices.size()]; + _isFirstRow = true; + } + + protected DblArray getNextRow() { + _rl++; + + if(_isFirstRow) { + for(int i = 0; i < _colIndexes.size(); i++) { + final double val = _data.get(_rl, _colIndexes.get(i)); + _previousRow[i] = val; + reusableArr[i] = val; + } + _isFirstRow = false; + } + else { + for(int i = 0; i < _colIndexes.size(); i++) { + final double currentVal = _data.get(_rl, _colIndexes.get(i)); + reusableArr[i] = currentVal - _previousRow[i]; + _previousRow[i] = currentVal; + } + } + + return reusableReturn; + } +} + + + diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseSingleBlockDelta.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseSingleBlockDelta.java new file mode 100644 index 00000000000..65f13343201 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseSingleBlockDelta.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.readers; + +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class ReaderColumnSelectionDenseSingleBlockDelta extends ReaderColumnSelection { + private final double[] _data; + private final int _numCols; + private final double[] _previousRow; + private boolean _isFirstRow; + + protected ReaderColumnSelectionDenseSingleBlockDelta(MatrixBlock data, IColIndex colIndices, int rl, int ru) { + super(colIndices, rl, Math.min(ru, data.getNumRows()) - 1); + _data = data.getDenseBlockValues(); + _numCols = data.getNumColumns(); + _previousRow = new double[colIndices.size()]; + _isFirstRow = true; + } + + protected DblArray getNextRow() { + _rl++; + final int indexOff = _rl * _numCols; + + if(_isFirstRow) { + for(int i = 0; i < _colIndexes.size(); i++) { + final double val = _data[indexOff + _colIndexes.get(i)]; + _previousRow[i] = val; + reusableArr[i] = val; + } + _isFirstRow = false; + } + else { + for(int i = 0; i < _colIndexes.size(); i++) { + final double currentVal = _data[indexOff + _colIndexes.get(i)]; + reusableArr[i] = currentVal - _previousRow[i]; + _previousRow[i] = currentVal; + } + } + + return reusableReturn; + } +} + + + diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionSparseDelta.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionSparseDelta.java new file mode 100644 index 00000000000..8ea1fff3396 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionSparseDelta.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.readers; + +import java.util.Arrays; + +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class ReaderColumnSelectionSparseDelta extends ReaderColumnSelection { + + private final SparseBlock _a; + private final double[] _previousRow; + private boolean _isFirstRow; + + protected ReaderColumnSelectionSparseDelta(MatrixBlock data, IColIndex colIndexes, int rl, int ru) { + super(colIndexes, rl, Math.min(ru, data.getNumRows()) - 1); + _a = data.getSparseBlock(); + _previousRow = new double[colIndexes.size()]; + _isFirstRow = true; + } + + protected final DblArray getNextRow() { + _rl++; + for(int i = 0; i < _colIndexes.size(); i++) + reusableArr[i] = 0.0; + + if(!_a.isEmpty(_rl)) + processInRange(_rl); + + if(_isFirstRow) { + for(int i = 0; i < _colIndexes.size(); i++) + _previousRow[i] = reusableArr[i]; + _isFirstRow = false; + } + else { + for(int i = 0; i < _colIndexes.size(); i++) { + final double currentVal = reusableArr[i]; + reusableArr[i] = currentVal - _previousRow[i]; + _previousRow[i] = currentVal; + } + } + + return reusableReturn; + } + + final void processInRange(final int r) { + final int apos = _a.pos(r); + final int alen = _a.size(r) + apos; + final int[] aix = _a.indexes(r); + final double[] avals = _a.values(r); + int skip = 0; + int j = Arrays.binarySearch(aix, apos, alen, _colIndexes.get(0)); + if(j < 0) + j = Math.abs(j + 1); + + while(skip < _colIndexes.size() && j < alen) { + if(_colIndexes.get(skip) == aix[j]) { + reusableArr[skip] = avals[j]; + skip++; + j++; + } + else if(_colIndexes.get(skip) > aix[j]) + j++; + else + skip++; + } + } +} + + + diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java index 0f2d965bce8..4b9643592b8 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java @@ -19,64 +19,89 @@ package org.apache.sysds.test.component.compress.colgroup; +import static org.junit.Assert.assertArrayEquals; + +import java.util.EnumSet; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.compress.CompressionSettings; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.estim.ComEstExact; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.util.DataConverter; +import org.junit.Test; + public class ColGroupDeltaDDCTest { - // protected static final Log LOG = LogFactory.getLog(JolEstimateTest.class.getName()); - - // @Test - // public void testDecompressToDenseBlockSingleColumn() { - // testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}}, true); - // } - - // @Test - // public void testDecompressToDenseBlockSingleColumnTransposed() { - // testDecompressToDenseBlock(new double[][] {{1}, {2}, {3}, {4}, {5}}, false); - // } - - // @Test - // public void testDecompressToDenseBlockTwoColumns() { - // testDecompressToDenseBlock(new double[][] {{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}}, false); - // } - - // @Test - // public void testDecompressToDenseBlockTwoColumnsTransposed() { - // testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}, {1, 1, 1, 1, 1}}, true); - // } - - // public void testDecompressToDenseBlock(double[][] data, boolean isTransposed) { - // MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); - - // final int numCols = isTransposed ? mbt.getNumRows() : mbt.getNumColumns(); - // final int numRows = isTransposed ? mbt.getNumColumns() : mbt.getNumRows(); - // int[] colIndexes = new int[numCols]; - // for(int x = 0; x < numCols; x++) - // colIndexes[x] = x; - - // try { - // CompressionSettings cs = new CompressionSettingsBuilder().setSamplingRatio(1.0) - // .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)).create(); - // cs.transposed = isTransposed; - - // final CompressedSizeInfoColGroup cgi = new CompressedSizeEstimatorExact(mbt, cs) - // .getColGroupInfo(colIndexes); - // CompressedSizeInfo csi = new CompressedSizeInfo(cgi); - // AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); - - // // Decompress to dense block - // MatrixBlock ret = new MatrixBlock(numRows, numCols, false); - // ret.allocateDenseBlock(); - // cg.decompressToDenseBlock(ret.getDenseBlock(), 0, numRows); - - // MatrixBlock expected = DataConverter.convertToMatrixBlock(data); - // if(isTransposed) - // LibMatrixReorg.transposeInPlace(expected, 1); - // Assert.assertArrayEquals(expected.getDenseBlockValues(), ret.getDenseBlockValues(), 0.01); - - // } - // catch(Exception e) { - // e.printStackTrace(); - // throw new DMLRuntimeException("Failed construction : " + this.getClass().getSimpleName()); - // } - // } + protected static final Log LOG = LogFactory.getLog(ColGroupDeltaDDCTest.class.getName()); + + @Test + public void testDecompressToDenseBlockSingleColumn() { + testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}}, false); + } + + @Test(expected = NotImplementedException.class) + public void testDecompressToDenseBlockSingleColumnTransposed() { + testDecompressToDenseBlock(new double[][] {{1}, {2}, {3}, {4}, {5}}, true); + } + + @Test + public void testDecompressToDenseBlockTwoColumns() { + testDecompressToDenseBlock(new double[][] {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}, false); + } + + @Test(expected = NotImplementedException.class) + public void testDecompressToDenseBlockTwoColumnsTransposed() { + testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}, {1, 1, 1, 1, 1}}, true); + } + + public void testDecompressToDenseBlock(double[][] data, boolean isTransposed) { + if(isTransposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + try { + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + MatrixBlock ret = new MatrixBlock(numRows, numCols, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), 0, numRows); + + MatrixBlock expected = DataConverter.convertToMatrixBlock(data); + assertArrayEquals(expected.getDenseBlockValues(), ret.getDenseBlockValues(), 0.01); + + } + catch(NotImplementedException e) { + throw e; + } + catch(Exception e) { + e.printStackTrace(); + throw new DMLRuntimeException("Failed construction : " + this.getClass().getSimpleName(), e); + } + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java new file mode 100644 index 00000000000..63a15119295 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.estim.encoding; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory; +import org.apache.sysds.runtime.compress.estim.encoding.EmptyEncoding; +import org.apache.sysds.runtime.compress.estim.encoding.IEncode; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class EncodeDeltaTest { + + protected static final Log LOG = LogFactory.getLog(EncodeDeltaTest.class.getName()); + + @Test + public void testCreateFromMatrixBlockDeltaBasic() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + assertTrue(encoding.getUnique() >= 1); + } + + @Test + public void testCreateFromMatrixBlockDeltaWithSampleSize() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 3); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaFirstRowAsIs() { + MatrixBlock mb = new MatrixBlock(2, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 10); + mb.set(1, 0, 5); + mb.set(1, 1, 10); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaConstantDeltas() { + MatrixBlock mb = new MatrixBlock(4, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(3, 0, 13); + mb.set(3, 1, 23); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + assertTrue(encoding.getUnique() <= 2); + } + + @Test + public void testCreateFromMatrixBlockDeltaSingleRow() { + MatrixBlock mb = new MatrixBlock(1, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaSparse() { + MatrixBlock mb = new MatrixBlock(3, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(2, 1, 22); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaColumnSelection() { + MatrixBlock mb = new MatrixBlock(3, 4, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(0, 2, 30); + mb.set(0, 3, 40); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(1, 2, 31); + mb.set(1, 3, 41); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(2, 2, 32); + mb.set(2, 3, 42); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(0, 2)); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaNegativeValues() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 8); + mb.set(1, 1, 15); + mb.set(2, 0, 12); + mb.set(2, 1, 25); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 0); + mb.set(1, 0, 5); + mb.set(1, 1, 0); + mb.set(2, 0, 0); + mb.set(2, 1, 5); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + } + + @Test(expected = NotImplementedException.class) + public void testCreateFromMatrixBlockDeltaTransposed() { + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.setNonZeros(2); + EncodingFactory.createFromMatrixBlockDelta(mb, true, ColIndexFactory.create(2)); + } + + @Test + public void testCreateFromMatrixBlockDeltaLargeMatrix() { + MatrixBlock mb = new MatrixBlock(100, 3, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 100; i++) { + mb.set(i, 0, i); + mb.set(i, 1, i * 2); + mb.set(i, 2, i * 3); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(3)); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaSampleSizeSmaller() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 5); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaSampleSizeLarger() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull(encoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaEmptyMatrix() { + // Test empty matrix with dimensions but all zeros + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + // Matrix has dimensions but is empty (all zeros) + // isEmpty() should return true + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + assertTrue(encoding instanceof EmptyEncoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaEmptyMatrixSparse() { + // Test empty sparse matrix with dimensions + MatrixBlock mb = new MatrixBlock(5, 2, true); + // Sparse matrix with no values is empty + mb.setNonZeros(0); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull(encoding); + assertTrue(encoding instanceof EmptyEncoding); + } + +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java index d2d255c0da9..caa56a44d5e 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java @@ -20,6 +20,7 @@ package org.apache.sysds.test.component.compress.estim.encoding; import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory; import org.apache.sysds.runtime.data.DenseBlockFP64; import org.apache.sysds.runtime.matrix.data.MatrixBlock; @@ -44,12 +45,12 @@ public void encodeNonContiguousTransposed() { EncodingFactory.createFromMatrixBlock(mock, true, 3); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testInvalidToCallWithNullDeltaTransposed() { EncodingFactory.createFromMatrixBlockDelta(null, true, null); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testInvalidToCallWithNullDelta() { EncodingFactory.createFromMatrixBlockDelta(null, false, null); } @@ -61,20 +62,30 @@ public void testInvalidToCallWithNull() { @Test(expected = NotImplementedException.class) public void testDeltaTransposed() { - EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), true, null); + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.setNonZeros(2); + EncodingFactory.createFromMatrixBlockDelta(mb, true, ColIndexFactory.create(2)); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testDelta() { EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), false, null); } @Test(expected = NotImplementedException.class) public void testDeltaTransposedNVals() { - EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), true, null, 2); + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.setNonZeros(2); + EncodingFactory.createFromMatrixBlockDelta(mb, true, ColIndexFactory.create(2), 2); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testDeltaNVals() { EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), false, null, 1); } diff --git a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java new file mode 100644 index 00000000000..8c3d5a7b439 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java @@ -0,0 +1,296 @@ +package org.apache.sysds.test.component.compress.lib; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.lib.CLALibUnary; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; +import org.apache.sysds.test.TestUtils; +import org.junit.Test; + +public class CLALibUnaryDeltaTest { + + protected static final Log LOG = LogFactory.getLog(CLALibUnaryDeltaTest.class.getName()); + + @Test + public void testCumsumResultsInDeltaEncoding() { + // Use data that results in repetitive deltas to ensure DeltaDDC is chosen + MatrixBlock mb = new MatrixBlock(20, 1, false); + mb.allocateDenseBlock(); + // Input: 1, 2, 1, 2, ... + // Cumsum: 1, 3, 4, 6, ... + // Deltas: 1, 2, 1, 2, ... + for(int i = 0; i < 20; i++) { + mb.set(i, 0, (i % 2 == 0) ? 1.0 : 2.0); + } + mb.setNonZeros(20); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + + assertNotNull("Result should not be null", result); + assertTrue("Result should be compressed", result instanceof CompressedMatrixBlock); + + CompressedMatrixBlock compressedResult = (CompressedMatrixBlock) result; + boolean hasDeltaDDC = false; + for(AColGroup cg : compressedResult.getColGroups()) { + if(cg.getCompType() == CompressionType.DeltaDDC) { + hasDeltaDDC = true; + break; + } + } + + assertTrue("Result should contain DeltaDDC column group", hasDeltaDDC); + } + + @Test + public void testRowcumsumResultsInDeltaEncoding() { + MatrixBlock mb = new MatrixBlock(3, 4, false); + mb.allocateDenseBlock(); + // Row 1: 1, 2, 3, 4 -> cumsum: 1, 3, 6, 10 + mb.set(0, 0, 1.0); + mb.set(0, 1, 2.0); + mb.set(0, 2, 3.0); + mb.set(0, 3, 4.0); + // Row 2: 1, 1, 1, 1 -> cumsum: 1, 2, 3, 4 + mb.set(1, 0, 1.0); + mb.set(1, 1, 1.0); + mb.set(1, 2, 1.0); + mb.set(1, 3, 1.0); + // Row 3: 5, 5, 5, 5 -> cumsum: 5, 10, 15, 20 + mb.set(2, 0, 5.0); + mb.set(2, 1, 5.0); + mb.set(2, 2, 5.0); + mb.set(2, 3, 5.0); + mb.setNonZeros(12); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator rowCumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ROWCUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, rowCumsumOp, null); + + assertNotNull("Result should not be null", result); + assertTrue("Result should be compressed", result instanceof CompressedMatrixBlock); + + CompressedMatrixBlock compressedResult = (CompressedMatrixBlock) result; + // Delta encoding is row-wise, so row cumsum might not always benefit from delta DDC as much as col cumsum + // but we enforce it preference so it should be there if applicable. + // Actually for row cumsum, the result across columns changes. + // Let's check correctness mainly. + MatrixBlock expected = mb.unaryOperations(rowCumsumOp, new MatrixBlock()); + TestUtils.compareMatrices(expected, result, 0.0, "RowCumsum result should match expected"); + } + + @Test + public void testCumsumCorrectness() { + MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 3, 0, 10, 1.0, 7); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result should match expected"); + } + + @Test + public void testRowcumsumCorrectness() { + MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 5, 0, 10, 1.0, 7); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator rowCumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ROWCUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, rowCumsumOp, null); + MatrixBlock expected = mb.unaryOperations(rowCumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "RowCumsum result should match expected"); + } + + @Test + public void testNonCumsumOperationDoesNotUseDeltaEncoding() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, i); + mb.set(i, 1, i * 2); + } + mb.setNonZeros(20); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator absOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, absOp, null); + + assertNotNull("Result should not be null", result); + + if(result instanceof CompressedMatrixBlock) { + CompressedMatrixBlock compressedResult = (CompressedMatrixBlock) result; + boolean hasDeltaDDC = false; + for(AColGroup cg : compressedResult.getColGroups()) { + if(cg.getCompType() == CompressionType.DeltaDDC) { + hasDeltaDDC = true; + break; + } + } + // Should not have delta DDC + assertTrue("Result should NOT contain DeltaDDC column group for ABS", !hasDeltaDDC); + } + // If not compressed, it's also fine (standard execution) + } + + @Test + public void testCumsumSparseMatrix() { + MatrixBlock mb = new MatrixBlock(100, 10, true); + mb.set(0, 0, 1.0); + mb.set(10, 0, 2.0); + mb.set(20, 0, 3.0); + mb.setNonZeros(3); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result for sparse matrix should match expected"); + } + + @Test + public void testCumsumWithDifferentInputCompressionTypes() { + MatrixBlock mb = new MatrixBlock(10, 1, false); + mb.allocateDenseBlock(); + // RLE friendly data: 1, 1, 1, 2, 2, 2, 3, 3, 3, 4 + for(int i=0; i<3; i++) mb.set(i, 0, 1.0); + for(int i=3; i<6; i++) mb.set(i, 0, 2.0); + for(int i=6; i<9; i++) mb.set(i, 0, 3.0); + mb.set(9, 0, 4.0); + mb.setNonZeros(10); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.RLE); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + + assertTrue("Result should be compressed", result instanceof CompressedMatrixBlock); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result from RLE input should match expected"); + } + + @Test + public void testCumsumLargeMatrix() { + // Larger matrix to trigger multi-threaded execution if applicable + MatrixBlock mb = TestUtils.generateTestMatrixBlock(100, 5, 0, 100, 1.0, 7); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result for large matrix should match expected"); + } + + @Test + public void testCumsumWithConstantColumns() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i=0; i<10; i++) { + mb.set(i, 0, 1.0); // Constant column + mb.set(i, 1, i); // Increasing column + } + mb.setNonZeros(20); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + csb.addValidCompression(CompressionType.CONST); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result with constant columns should match expected"); + } + + @Test + public void testCumsumMultiColumn() { + MatrixBlock mb = new MatrixBlock(10, 4, false); + mb.allocateDenseBlock(); + for(int i=0; i<10; i++) { + for(int j=0; j<4; j++) { + mb.set(i, j, i+j); + } + } + mb.setNonZeros(40); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result for multi-column matrix should match expected"); + } + + @Test + public void testCumsumWhenDeltaDDCNotInValidCompressions() { + MatrixBlock mb = new MatrixBlock(4, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1.0); + mb.set(0, 1, 2.0); + mb.set(1, 0, 3.0); + mb.set(1, 1, 4.0); + mb.set(2, 0, 5.0); + mb.set(2, 1, 6.0); + mb.set(3, 0, 7.0); + mb.set(3, 1, 8.0); + mb.setNonZeros(8); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.RLE); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + + assertNotNull("Result should not be null", result); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result should match expected even when DeltaDDC not in valid compressions"); + } + + private CompressedMatrixBlock compress(MatrixBlock mb, CompressionSettingsBuilder csb) { + MatrixBlock mbComp = CompressedMatrixBlockFactory.compress(mb, 1, csb).getLeft(); + if(mbComp instanceof CompressedMatrixBlock) + return (CompressedMatrixBlock) mbComp; + else + return CompressedMatrixBlockFactory.genUncompressedCompressedMatrixBlock(mbComp); + } +} diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java new file mode 100644 index 00000000000..eb4df2f47eb --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.readers; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.DMLCompressionException; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionDenseSingleBlockDelta; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionDenseMultiBlockDelta; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionSparseDelta; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionEmpty; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ReadersDeltaTest { + + protected static final Log LOG = LogFactory.getLog(ReadersDeltaTest.class.getName()); + + @Test + public void testDeltaReaderDenseSingleBlockBasic() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionDenseSingleBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertNotNull(row2); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderFirstRowAsIs() { + MatrixBlock mb = new MatrixBlock(2, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 10); + mb.set(1, 0, 7); + mb.set(1, 1, 12); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {5, 10}, row0.getData(), 0.0); + } + + @Test + public void testDeltaReaderNegativeValues() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 8); + mb.set(1, 1, 15); + mb.set(2, 0, 12); + mb.set(2, 1, 25); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + reader.nextRow(); + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {-2, -5}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {4, 10}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 0); + mb.set(1, 0, 5); + mb.set(1, 1, 0); + mb.set(2, 0, 0); + mb.set(2, 1, 5); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + reader.nextRow(); + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {0, 0}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {-5, 5}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderSingleRow() { + MatrixBlock mb = new MatrixBlock(1, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderTwoRows() { + MatrixBlock mb = new MatrixBlock(2, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 15); + mb.set(1, 1, 25); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {5, 5}, row1.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderColumnSelection() { + MatrixBlock mb = new MatrixBlock(3, 4, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(0, 2, 30); + mb.set(0, 3, 40); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(1, 2, 31); + mb.set(1, 3, 41); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(2, 2, 32); + mb.set(2, 3, 42); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.createI(0, 2), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 30}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderSparse() { + MatrixBlock mb = new MatrixBlock(3, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(2, 1, 22); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionSparseDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {1, -20}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {-11, 22}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderSparseZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, true); + mb.set(0, 0, 5); + mb.set(1, 1, 10); + mb.set(2, 0, 5); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {5, 0}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {-5, 10}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {5, -10}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderRange() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false, 1, 4); + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {11, 21}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + DblArray row3 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row3.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test(expected = DMLCompressionException.class) + public void testDeltaReaderInvalidRange() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false, 10, 9); + } + + @Test(expected = NotImplementedException.class) + public void testDeltaReaderTransposed() { + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.setNonZeros(100); + ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), true); + } + + @Test + public void testDeltaReaderLargeMatrix() { + MatrixBlock mb = new MatrixBlock(100, 3, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 100; i++) { + mb.set(i, 0, i); + mb.set(i, 1, i * 2); + mb.set(i, 2, i * 3); + } + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(3), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {0, 0, 0}, row0.getData(), 0.0); + + for(int i = 1; i < 100; i++) { + DblArray row = reader.nextRow(); + assertNotNull(row); + assertArrayEquals(new double[] {1, 2, 3}, row.getData(), 0.0); + } + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderEmptyMatrix() { + // Test empty matrix with dimensions but all zeros + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + // Matrix has dimensions but is empty (all zeros) + // isEmpty() should return true + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertTrue(reader instanceof ReaderColumnSelectionEmpty); + + // Empty reader should return null immediately + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderEmptyMatrixSparse() { + // Test empty sparse matrix with dimensions + MatrixBlock mb = new MatrixBlock(5, 2, true); + // Sparse matrix with no values is empty + mb.setNonZeros(0); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertTrue(reader instanceof ReaderColumnSelectionEmpty); + + // Empty reader should return null immediately + assertNull(reader.nextRow()); + } + +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java index ae92d3a4313..94e2fb5c29f 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.DMLCompressionException; @@ -39,10 +40,11 @@ public class ReadersTest { protected static final Log LOG = LogFactory.getLog(ReadersTest.class.getName()); - @Test(expected = DMLCompressionException.class) + @Test public void testDenseSingleCol() { MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 1, 1, 1, 0.5, 21342); - ReaderColumnSelection.createReader(mb, ColIndexFactory.create(1), false); + ReaderColumnSelection reader = ReaderColumnSelection.createReader(mb, ColIndexFactory.create(1), false); + assertNotNull(reader); } @Test @@ -125,6 +127,49 @@ public void testReaderColumnSelectionQuantized() { } } } - + + @Test + public void testDeltaReaderBasic() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertNotNull(row2); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + assertEquals(null, reader.nextRow()); + } + + @Test + public void testDeltaReaderSingleCol() { + MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 1, 1, 1, 0.5, 21342); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(1), false); + assertNotNull(reader); + } + + @Test(expected = NotImplementedException.class) + public void testDeltaReaderTransposed() { + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.setNonZeros(2); + ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), true); + } } From acc68499fa532d4beefd7b8e78fd7575f9aee393 Mon Sep 17 00:00:00 2001 From: Hana Halitim Date: Sun, 7 Dec 2025 16:25:02 +0100 Subject: [PATCH 2/6] [SYSTEMDS-3539] Fix DeltaDDC logic and clean up per review - Fixed incorrect decompression logic for rl > 0 (partial ranges). - Removed unnecessary empty constructors. - Overrode unsupported DDC methods in ColGroupDeltaDDC. - Corrected ColGroupDeltaDDC.create for constant conversion. - Fixed dictionary allocation size for extra flag in ColGroupFactory. - Optimized CUMSUM/ROWCUMSUM to reinterpret DDC groups as DeltaDDC. - Strengthened EncodeDeltaTest assertions and added combine() tests. - Added new tests for partial range decompression and serialization. - Removed unused imports. --- .../runtime/compress/colgroup/AColGroup.java | 4 - .../colgroup/AColGroupCompressed.java | 4 - .../compress/colgroup/AColGroupValue.java | 4 - .../compress/colgroup/ADictBasedColGroup.java | 5 - .../runtime/compress/colgroup/APreAgg.java | 4 - .../compress/colgroup/ColGroupDDC.java | 79 +++- .../compress/colgroup/ColGroupDeltaDDC.java | 419 ++++++++++++++++-- .../compress/colgroup/ColGroupFactory.java | 8 +- .../runtime/compress/colgroup/ColGroupIO.java | 10 +- .../colgroup/dictionary/DeltaDictionary.java | 55 ++- .../dictionary/DictionaryFactory.java | 4 +- .../runtime/compress/lib/CLALibUnary.java | 38 +- .../compress/utils/DblArrayCountHashMap.java | 2 +- .../colgroup/ColGroupDeltaDDCTest.java | 169 ++++++- .../colgroup/JolEstimateDeltaDDCTest.java | 36 +- .../compress/colgroup/JolEstimateTest.java | 31 +- .../dictionary/DeltaDictionaryTest.java | 72 ++- .../estim/encoding/EncodeDeltaTest.java | 199 +++++++-- .../compress/lib/CLALibUnaryDeltaTest.java | 20 +- 19 files changed, 1010 insertions(+), 153 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java index 57503f64884..ec502d6d122 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java @@ -93,10 +93,6 @@ protected static enum ColGroupType { /** The ColGroup indexes contained in the ColGroup */ protected final IColIndex _colIndexes; - protected AColGroup() { - _colIndexes = null; - } - /** * Main constructor. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java index 8e8211e40fc..b83f133647e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java @@ -46,10 +46,6 @@ public abstract class AColGroupCompressed extends AColGroup { private static final long serialVersionUID = 6219835795420081223L; - protected AColGroupCompressed() { - super(); - } - protected AColGroupCompressed(IColIndex colIndices) { super(colIndices); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java index bd9834938e6..0cde289b30f 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java @@ -35,10 +35,6 @@ public abstract class AColGroupValue extends ADictBasedColGroup { /** The count of each distinct value contained in the dictionary */ private SoftReference counts = null; - protected AColGroupValue() { - super(); - } - /** * A abstract class for column groups that contain IDictionary for values. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java index 16bfe7d0924..8f2f0b46055 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java @@ -43,11 +43,6 @@ public abstract class ADictBasedColGroup extends AColGroupCompressed implements /** Distinct value tuples associated with individual bitmaps. */ protected final IDictionary _dict; - protected ADictBasedColGroup() { - super(); - _dict = null; - } - /** * A Abstract class for column groups that contain IDictionary for values. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java index 7f85ad9519a..bddc50d4bcb 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java @@ -41,10 +41,6 @@ public abstract class APreAgg extends AColGroupValue { private static boolean loggedWarningForDirect = false; - protected APreAgg() { - super(); - } - /** * A Abstract class for column groups that contain IDictionary for values. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index 0c9c6eb067a..c6d3d6d10c3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -33,6 +33,7 @@ import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; @@ -43,6 +44,9 @@ import org.apache.sysds.runtime.compress.colgroup.indexes.RangeIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.utils.ACount; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap; import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme; @@ -77,11 +81,6 @@ public class ColGroupDDC extends APreAgg implements IMapToDataGroup { static final VectorSpecies SPECIES = DoubleVector.SPECIES_PREFERRED; - protected ColGroupDDC() { - super(); - _data = null; - } - protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { super(colIndexes, dict, cachedCounts); _data = data; @@ -1110,4 +1109,74 @@ protected boolean allowShallowIdentityRightMult() { return true; } + public AColGroup convertToDeltaDDC() { + int numCols = _colIndexes.size(); + int numRows = _data.size(); + + DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64)); + double[] rowDelta = new double[numCols]; + double[] prevRow = new double[numCols]; + DblArray dblArray = new DblArray(rowDelta); + int[] rowToDictId = new int[numRows]; + + double[] dictVals = null; + try { + dictVals = _dict.getValues(); + } catch (Exception e) { + } + + for(int i = 0; i < numRows; i++) { + int dictIdx = _data.getIndex(i); + if(dictVals != null) { + int off = dictIdx * numCols; + for(int j = 0; j < numCols; j++) { + double val = dictVals[off + j]; + if(i == 0) { + rowDelta[j] = val; + prevRow[j] = val; + } else { + rowDelta[j] = val - prevRow[j]; + prevRow[j] = val; + } + } + } else { + for(int j = 0; j < numCols; j++) { + double val = _dict.getValue(dictIdx, j, numCols); + if(i == 0) { + rowDelta[j] = val; + prevRow[j] = val; + } else { + rowDelta[j] = val - prevRow[j]; + prevRow[j] = val; + } + } + } + + rowToDictId[i] = map.increment(dblArray); + } + + if(map.size() == 0) + return new ColGroupEmpty(_colIndexes); + + ACount[] vals = map.extractValues(); + final int nVals = vals.length; + final double[] dictValues = new double[nVals * numCols]; + final int[] oldIdToNewId = new int[map.size()]; + int idx = 0; + for(int i = 0; i < nVals; i++) { + final ACount dac = vals[i]; + final double[] arrData = dac.key().getData(); + System.arraycopy(arrData, 0, dictValues, idx, numCols); + oldIdToNewId[dac.id] = i; + idx += numCols; + } + + DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols); + AMapToData newData = MapToFactory.create(numRows, nVals); + for(int i = 0; i < numRows; i++) { + newData.set(i, oldIdToNewId[rowToDictId[i]]); + } + return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null); + } + } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java index 292931a0f38..c49845627c4 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java @@ -19,17 +19,33 @@ package org.apache.sysds.runtime.compress.colgroup; +import java.io.DataInput; +import java.io.IOException; +import java.util.Arrays; + import org.apache.commons.lang3.NotImplementedException; -import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.utils.Util; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.data.SparseBlockMCSR; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.Divide; +import org.apache.sysds.runtime.functionobjects.Minus; +import org.apache.sysds.runtime.functionobjects.Multiply; +import org.apache.sysds.runtime.functionobjects.Plus; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; /** * Class to encapsulate information about a column group that is first delta encoded then encoded with dense dictionary @@ -38,11 +54,6 @@ public class ColGroupDeltaDDC extends ColGroupDDC { private static final long serialVersionUID = -1045556313148564147L; - /** Constructor for serialization */ - protected ColGroupDeltaDDC() { - super(); - } - private ColGroupDeltaDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { super(colIndexes, dict, data, cachedCounts); if(CompressedMatrixBlock.debug) { @@ -52,12 +63,31 @@ private ColGroupDeltaDDC(IColIndex colIndexes, IDictionary dict, AMapToData data } public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { - if(data.getUnique() == 1) - return ColGroupConst.create(colIndexes, dict); - else if(dict == null) + if(dict == null) return new ColGroupEmpty(colIndexes); - else - return new ColGroupDeltaDDC(colIndexes, dict, data, cachedCounts); + + if(!(dict instanceof DeltaDictionary)) + throw new DMLCompressionException("ColGroupDeltaDDC must use DeltaDictionary"); + + if(data.getUnique() == 1) { + DeltaDictionary deltaDict = (DeltaDictionary) dict; + double[] values = deltaDict.getValues(); + final int nCol = colIndexes.size(); + boolean allZeros = true; + for(int i = 0; i < nCol; i++) { + if(!Util.eq(values[i], 0.0)) { + allZeros = false; + break; + } + } + if(allZeros) { + double[] constValues = new double[nCol]; + System.arraycopy(values, 0, constValues, 0, nCol); + return ColGroupConst.create(colIndexes, Dictionary.create(constValues)); + } + } + + return new ColGroupDeltaDDC(colIndexes, dict, data, cachedCounts); } @Override @@ -65,6 +95,18 @@ public CompressionType getCompType() { return CompressionType.DeltaDDC; } + @Override + public ColGroupType getColGroupType() { + return ColGroupType.DeltaDDC; + } + + public static ColGroupDeltaDDC read(DataInput in) throws IOException { + IColIndex cols = ColIndexFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); + AMapToData data = MapToFactory.readIn(in); + return new ColGroupDeltaDDC(cols, dict, data, null); + } + @Override protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, double[] values) { @@ -72,16 +114,95 @@ protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int final double[] prevRow = new double[nCol]; if(rl > 0) { - final double[] prevRowData = db.values(rl - 1 + offR); - final int prevOff = db.pos(rl - 1 + offR) + offC; + final int dictIdx0 = _data.getIndex(0); + final int rowIndex0 = dictIdx0 * nCol; for(int j = 0; j < nCol; j++) { - prevRow[j] = prevRowData[prevOff + _colIndexes.get(j)]; + prevRow[j] = values[rowIndex0 + j]; + } + for(int i = 1; i < rl; i++) { + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + for(int j = 0; j < nCol; j++) { + prevRow[j] += values[rowIndex + j]; + } + } + } + + if(db.isContiguous() && nCol == db.getDim(1) && offC == 0) { + final int nColOut = db.getDim(1); + final double[] c = db.values(0); + for(int i = rl; i < ru; i++) { + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + final int rowBaseOff = (i + offR) * nColOut; + + if(i == 0 && rl == 0) { + for(int j = 0; j < nCol; j++) { + final double value = values[rowIndex + j]; + c[rowBaseOff + j] = value; + prevRow[j] = value; + } + } + else { + for(int j = 0; j < nCol; j++) { + final double delta = values[rowIndex + j]; + final double newValue = prevRow[j] + delta; + c[rowBaseOff + j] = newValue; + prevRow[j] = newValue; + } + } + } + } + else { + for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { + final double[] c = db.values(offT); + final int off = db.pos(offT) + offC; + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + + if(i == 0 && rl == 0) { + for(int j = 0; j < nCol; j++) { + final double value = values[rowIndex + j]; + final int colIdx = _colIndexes.get(j); + c[off + colIdx] = value; + prevRow[j] = value; + } + } + else { + for(int j = 0; j < nCol; j++) { + final double delta = values[rowIndex + j]; + final double newValue = prevRow[j] + delta; + final int colIdx = _colIndexes.get(j); + c[off + colIdx] = newValue; + prevRow[j] = newValue; + } + } + } + } + } + + @Override + protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + double[] values) { + final int nCol = _colIndexes.size(); + final double[] prevRow = new double[nCol]; + + if(rl > 0) { + final int dictIdx0 = _data.getIndex(0); + final int rowIndex0 = dictIdx0 * nCol; + for(int j = 0; j < nCol; j++) { + prevRow[j] = values[rowIndex0 + j]; + } + for(int i = 1; i < rl; i++) { + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + for(int j = 0; j < nCol; j++) { + prevRow[j] += values[rowIndex + j]; + } } } for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { - final double[] c = db.values(offT); - final int off = db.pos(offT) + offC; final int dictIdx = _data.getIndex(i); final int rowIndex = dictIdx * nCol; @@ -89,7 +210,7 @@ protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int for(int j = 0; j < nCol; j++) { final double value = values[rowIndex + j]; final int colIdx = _colIndexes.get(j); - c[off + colIdx] = value; + ret.append(offT, colIdx + offC, value); prevRow[j] = value; } } @@ -98,7 +219,7 @@ protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int final double delta = values[rowIndex + j]; final double newValue = prevRow[j] + delta; final int colIdx = _colIndexes.get(j); - c[off + colIdx] = newValue; + ret.append(offT, colIdx + offC, newValue); prevRow[j] = newValue; } } @@ -106,20 +227,266 @@ protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int } @Override - protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, - double[] values) { - throw new NotImplementedException("Sparse block decompression for DeltaDDC not yet implemented"); + protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + SparseBlock sb) { + throw new NotImplementedException("Dense block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + SparseBlock sb) { + throw new NotImplementedException("Sparse block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) { + throw new NotImplementedException("Transposed dense block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) { + throw new NotImplementedException("Transposed dense block decompression from dense dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) { + throw new NotImplementedException("Transposed sparse block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) { + throw new NotImplementedException("Transposed sparse block decompression from dense dictionary for DeltaDDC not yet implemented"); } @Override public AColGroup scalarOperation(ScalarOperator op) { - if(_dict instanceof DeltaDictionary) { - DeltaDictionary deltaDict = (DeltaDictionary) _dict; - IDictionary newDict = deltaDict.applyScalarOp(op); - return new ColGroupDeltaDDC(_colIndexes, newDict, _data, getCachedCounts()); + if(op.fn instanceof Multiply || op.fn instanceof Divide) { + return super.scalarOperation(op); + } + else if(op.fn instanceof Plus || op.fn instanceof Minus) { + return scalarOperationShift(op); } else { - throw new DMLRuntimeException("DeltaDDC must use DeltaDictionary"); + throw new NotImplementedException("Scalar op " + op.fn.getClass().getSimpleName() + " not supported for DeltaDDC"); + } + } + + private AColGroup scalarOperationShift(ScalarOperator op) { + final int nCol = _colIndexes.size(); + final int id0 = _data.getIndex(0); + final double[] vals = _dict.getValues(); + final double[] tuple0 = new double[nCol]; + for(int j = 0; j < nCol; j++) + tuple0[j] = vals[id0 * nCol + j]; + + final double[] tupleNew = new double[nCol]; + for(int j = 0; j < nCol; j++) + tupleNew[j] = op.executeScalar(tuple0[j]); + + int[] counts = getCounts(); + if(counts[id0] == 1) { + double[] newVals = vals.clone(); + for(int j = 0; j < nCol; j++) + newVals[id0 * nCol + j] = tupleNew[j]; + return create(_colIndexes, new DeltaDictionary(newVals, nCol), _data, counts); + } + else { + int idNew = -1; + int nEntries = vals.length / nCol; + for(int k = 0; k < nEntries; k++) { + boolean match = true; + for(int j = 0; j < nCol; j++) { + if(vals[k * nCol + j] != tupleNew[j]) { + match = false; + break; + } + } + if(match) { + idNew = k; + break; + } + } + + IDictionary newDict = _dict; + if(idNew == -1) { + double[] newVals = Arrays.copyOf(vals, vals.length + nCol); + System.arraycopy(tupleNew, 0, newVals, vals.length, nCol); + newDict = new DeltaDictionary(newVals, nCol); + idNew = nEntries; + } + + AMapToData newData = _data.slice(0, _data.size()); + if(idNew >= newData.getUpperBoundValue()) { + newData = newData.resize(idNew + 1); + } + newData.set(0, idNew); + + return create(_colIndexes, newDict, newData, null); + } + } + + @Override + public AColGroup unaryOperation(UnaryOperator op) { + throw new NotImplementedException("Unary operation not supported for DeltaDDC"); + } + + @Override + public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) { + throw new NotImplementedException("Left matrix multiplication not supported for DeltaDDC"); + } + + @Override + public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) { + throw new NotImplementedException("Right matrix multiplication not supported for DeltaDDC"); + } + + @Override + public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) { + throw new NotImplementedException("Pre-aggregate dense not supported for DeltaDDC"); + } + + @Override + public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) { + throw new NotImplementedException("Pre-aggregate sparse not supported for DeltaDDC"); + } + + @Override + public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate DDC structure not supported for DeltaDDC"); + } + + @Override + public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate SDCZeros structure not supported for DeltaDDC"); + } + + @Override + public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate SDCSingleZeros structure not supported for DeltaDDC"); + } + + @Override + protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate RLE structure not supported for DeltaDDC"); + } + + @Override + protected double computeMxx(double c, Builtin builtin) { + throw new NotImplementedException("Compute Min/Max not supported for DeltaDDC"); + } + + @Override + protected void computeColMxx(double[] c, Builtin builtin) { + throw new NotImplementedException("Compute Column Min/Max not supported for DeltaDDC"); + } + + @Override + protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) { + throw new NotImplementedException("Compute Row Min/Max not supported for DeltaDDC"); + } + + @Override + protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) { + throw new NotImplementedException("Compute Row Sums not supported for DeltaDDC"); + } + + @Override + protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) { + throw new NotImplementedException("Compute Row Product not supported for DeltaDDC"); + } + + @Override + public boolean containsValue(double pattern) { + throw new NotImplementedException("Contains value not supported for DeltaDDC"); + } + + @Override + public AColGroup append(AColGroup g) { + throw new NotImplementedException("Append not supported for DeltaDDC"); + } + + @Override + public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) { + throw new NotImplementedException("AppendN not supported for DeltaDDC"); + } + + @Override + public long getNumberNonZeros(int nRows) { + long nnz = 0; + final int nCol = _colIndexes.size(); + final double[] prevRow = new double[nCol]; + + for(int i = 0; i < nRows; i++) { + final int dictIdx = _data.getIndex(i); + final double[] vals = _dict.getValues(); + final int rowIndex = dictIdx * nCol; + + if(i == 0) { + for(int j = 0; j < nCol; j++) { + double val = vals[rowIndex + j]; + prevRow[j] = val; + if(val != 0) + nnz++; + } + } + else { + for(int j = 0; j < nCol; j++) { + double val = prevRow[j] + vals[rowIndex + j]; + prevRow[j] = val; + if(val != 0) + nnz++; + } + } + } + return nnz; + } + + @Override + public AColGroup sliceRows(int rl, int ru) { + AMapToData slicedData = _data.slice(rl, ru); + final int nCol = _colIndexes.size(); + double[] firstRowValues = new double[nCol]; + double[] dictVals = ((DeltaDictionary)_dict).getValues(); + + for(int i = 0; i <= rl; i++) { + int dictIdx = _data.getIndex(i); + int dictOffset = dictIdx * nCol; + if(i == 0) { + for(int j = 0; j < nCol; j++) firstRowValues[j] = dictVals[dictOffset + j]; + } else { + for(int j = 0; j < nCol; j++) firstRowValues[j] += dictVals[dictOffset + j]; + } + } + + int nEntries = dictVals.length / nCol; + int newId = -1; + for(int k = 0; k < nEntries; k++) { + boolean match = true; + for(int j = 0; j < nCol; j++) { + if(dictVals[k * nCol + j] != firstRowValues[j]) { + match = false; + break; + } + } + if(match) { + newId = k; + break; + } } + + IDictionary newDict = _dict; + if(newId == -1) { + double[] newDictVals = Arrays.copyOf(dictVals, dictVals.length + nCol); + System.arraycopy(firstRowValues, 0, newDictVals, dictVals.length, nCol); + newDict = new DeltaDictionary(newDictVals, nCol); + newId = nEntries; + + if(newId >= slicedData.getUpperBoundValue()) { + slicedData = slicedData.resize(newId + 1); + } + } + + slicedData.set(0, newId); + return ColGroupDeltaDDC.create(_colIndexes, newDict, slicedData, null); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index d368b84dc2b..b3c7e21ed7e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -292,6 +292,9 @@ else if(ct == CompressionType.DDC) { else if(ct == CompressionType.DeltaDDC) { return directCompressDeltaDDC(colIndexes, cg); } + else if(ct == CompressionType.CONST && cs.preferDeltaEncoding) { + return directCompressDeltaDDC(colIndexes, cg); + } else if(ct == CompressionType.LinearFunctional) { if(cs.scaleFactors != null) { throw new NotImplementedException(); // quantization-fused compression NOT allowed @@ -315,7 +318,7 @@ else if(ct == CompressionType.SDC && colIndexes.size() == 1 && !t) { return new ColGroupEmpty(colIndexes); } final IntArrayList[] of = ubm.getOffsetList(); - if(of.length == 1 && of[0].size() == nRow) { // If this always constant + if(of.length == 1 && of[0].size() == nRow && ct != CompressionType.DeltaDDC) { // If this always constant return ColGroupConst.create(colIndexes, DictionaryFactory.create(ubm)); } @@ -756,7 +759,8 @@ private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, Compresse final ACount[] vals = map.extractValues(); final int nVals = vals.length; - final double[] dictValues = new double[nVals * colIndexes.size()]; + final int nTuplesOut = nVals + (extra ? 1 : 0); + final double[] dictValues = new double[nTuplesOut * colIndexes.size()]; final int[] oldIdToNewId = new int[map.size()]; int idx = 0; for(int i = 0; i < nVals; i++) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java index 91442281317..b47100d4e64 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java @@ -105,10 +105,12 @@ public static AColGroup readColGroup(DataInput in, int nRows) throws IOException switch(ctype) { case DDC: return ColGroupDDC.read(in); - case DDCFOR: - return ColGroupDDCFOR.read(in); - case OLE: - return ColGroupOLE.read(in, nRows); + case DDCFOR: + return ColGroupDDCFOR.read(in); + case DeltaDDC: + return ColGroupDeltaDDC.read(in); + case OLE: + return ColGroupOLE.read(in, nRows); case RLE: return ColGroupRLE.read(in, nRows); case CONST: diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java index d67ab95f824..d667e76ed5e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java @@ -19,14 +19,13 @@ package org.apache.sysds.runtime.compress.colgroup.dictionary; +import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.functionobjects.Divide; -import org.apache.sysds.runtime.functionobjects.Minus; import org.apache.sysds.runtime.functionobjects.Multiply; -import org.apache.sysds.runtime.functionobjects.Plus; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; /** @@ -50,26 +49,22 @@ public double[] getValues(){ return _values; } + @Override + public double getValue(int i, int col, int nCol) { + return _values[i * nCol + col]; + } + @Override public DeltaDictionary applyScalarOp(ScalarOperator op) { - final double[] retV = new double[_values.length]; if(op.fn instanceof Multiply || op.fn instanceof Divide) { + final double[] retV = new double[_values.length]; for(int i = 0; i < _values.length; i++) retV[i] = op.executeScalar(_values[i]); + return new DeltaDictionary(retV, _numCols); } - else if(op.fn instanceof Plus || op.fn instanceof Minus) { - // With Plus and Minus only the first row needs to be updated when delta encoded - for(int i = 0; i < _values.length; i++) { - if(i < _numCols) - retV[i] = op.executeScalar(_values[i]); - else - retV[i] = _values[i]; - } + else { + throw new NotImplementedException("Scalar op " + op.fn.getClass().getSimpleName() + " not supported in DeltaDictionary"); } - else - throw new NotImplementedException(); - - return new DeltaDictionary(retV, _numCols); } @Override @@ -79,17 +74,30 @@ public long getInMemorySize() { @Override public void write(DataOutput out) throws IOException { - throw new NotImplementedException(); + out.writeByte(DictionaryFactory.Type.DELTA_DICT.ordinal()); + out.writeInt(_numCols); + out.writeInt(_values.length); + for(int i = 0; i < _values.length; i++) + out.writeDouble(_values[i]); + } + + public static DeltaDictionary read(DataInput in) throws IOException { + int numCols = in.readInt(); + int numValues = in.readInt(); + double[] values = new double[numValues]; + for(int i = 0; i < numValues; i++) + values[i] = in.readDouble(); + return new DeltaDictionary(values, numCols); } @Override public long getExactSizeOnDisk() { - throw new NotImplementedException(); + return 1 + 4 + 4 + 8L * _values.length; } @Override public DictType getDictType() { - throw new NotImplementedException(); + return DictType.Delta; } @Override @@ -104,12 +112,19 @@ public int getNumberOfColumns(int nrow){ @Override public String getString(int colIndexes) { - throw new NotImplementedException(); + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < _values.length; i++) { + sb.append(_values[i]); + if(i != _values.length - 1) { + sb.append((i + 1) % colIndexes == 0 ? "\n" : ", "); + } + } + return sb.toString(); } @Override public long getNumberNonZeros(int[] counts, int nCol) { - throw new NotImplementedException(); + throw new NotImplementedException("Cannot calculate non-zeros from DeltaDictionary alone"); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java index f88ac99b87b..005d14f9ce1 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java @@ -52,7 +52,7 @@ public interface DictionaryFactory { static final Log LOG = LogFactory.getLog(DictionaryFactory.class.getName()); public enum Type { - FP64_DICT, MATRIX_BLOCK_DICT, INT8_DICT, IDENTITY, IDENTITY_SLICE, PLACE_HOLDER + FP64_DICT, MATRIX_BLOCK_DICT, INT8_DICT, IDENTITY, IDENTITY_SLICE, PLACE_HOLDER, DELTA_DICT } public static IDictionary read(DataInput in) throws IOException { @@ -68,6 +68,8 @@ public static IDictionary read(DataInput in) throws IOException { return IdentityDictionary.read(in); case IDENTITY_SLICE: return IdentityDictionarySlice.read(in); + case DELTA_DICT: + return DeltaDictionary.read(in); case MATRIX_BLOCK_DICT: default: return MatrixBlockDictionary.read(in); diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java index 781c13de1d8..335aaaef0c5 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java @@ -30,6 +30,7 @@ import org.apache.sysds.runtime.compress.CompressionStatistics; import org.apache.sysds.runtime.compress.colgroup.AColGroup; import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode; import org.apache.sysds.runtime.matrix.data.LibMatrixAgg; @@ -50,6 +51,41 @@ public static MatrixBlock unaryOperations(CompressedMatrixBlock m, UnaryOperator final int c = m.getNumColumns(); if(Builtin.isBuiltinCode(op.fn, BuiltinCode.CUMSUM, BuiltinCode.ROWCUMSUM)) { + List groups = m.getColGroups(); + boolean allDDC = true; + for(AColGroup g : groups) { + if(g.getCompType() != CompressionType.DDC) { + allDDC = false; + break; + } + } + + if(allDDC && !groups.isEmpty()) { + MatrixBlock uncompressed = m.getUncompressed("CUMSUM/ROWCUMSUM requires uncompression", op.getNumThreads()); + MatrixBlock opResult = uncompressed.unaryOperations(op, null); + + List convertedGroups = new ArrayList<>(groups.size()); + for(AColGroup g : groups) { + AColGroup converted = ((ColGroupDDC) g).convertToDeltaDDC(); + if(converted == null) { + allDDC = false; + break; + } + convertedGroups.add(converted); + } + + if(allDDC) { + CompressedMatrixBlock ret = new CompressedMatrixBlock(m.getNumRows(), m.getNumColumns()); + ret.allocateColGroupList(convertedGroups); + ret.recomputeNonZeros(); + + MatrixBlock verifyDecompressed = ret.getUncompressed("Verification", op.getNumThreads()); + if(verifyDecompressed.equals(opResult)) { + return ret; + } + } + } + MatrixBlock uncompressed = m.getUncompressed("CUMSUM/ROWCUMSUM requires uncompression", op.getNumThreads()); MatrixBlock opResult = uncompressed.unaryOperations(op, null); @@ -57,7 +93,7 @@ public static MatrixBlock unaryOperations(CompressedMatrixBlock m, UnaryOperator csb.clearValidCompression(); csb.setPreferDeltaEncoding(true); csb.addValidCompression(CompressionType.DeltaDDC); - csb.setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.UNCOMPRESSED); csb.setTransposeInput("false"); Pair compressedPair = CompressedMatrixBlockFactory.compress(opResult, op.getNumThreads(), csb); MatrixBlock compressedResult = compressedPair.getLeft(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java b/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java index cf8771d83aa..6b720c9dee2 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java +++ b/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java @@ -40,7 +40,7 @@ protected final int hash(DblArray key) { } protected final DArrCounts create(DblArray key, int id) { - return new DArrCounts(key, id); + return new DArrCounts(new DblArray(key), id); } @Override diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java index 4b9643592b8..54c35866aa6 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java @@ -20,7 +20,15 @@ package org.apache.sysds.test.component.compress.colgroup; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Collections; import java.util.EnumSet; import org.apache.commons.lang3.NotImplementedException; @@ -30,8 +38,9 @@ import org.apache.sysds.runtime.compress.CompressionSettings; import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; import org.apache.sysds.runtime.compress.colgroup.AColGroup; -import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC; import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; +import org.apache.sysds.runtime.compress.colgroup.ColGroupIO; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.estim.ComEstExact; @@ -65,6 +74,21 @@ public void testDecompressToDenseBlockTwoColumnsTransposed() { testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}, {1, 1, 1, 1, 1}}, true); } + @Test + public void testDecompressToDenseBlockPartialRangeSingleColumn() { + testDecompressToDenseBlockPartialRange(new double[][] {{1}, {2}, {3}, {4}, {5}}, false, 2, 5); + } + + @Test + public void testDecompressToDenseBlockPartialRangeTwoColumns() { + testDecompressToDenseBlockPartialRange(new double[][] {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}, false, 1, 4); + } + + @Test + public void testDecompressToDenseBlockPartialRangeFromMiddle() { + testDecompressToDenseBlockPartialRange(new double[][] {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}}, false, 3, 6); + } + public void testDecompressToDenseBlock(double[][] data, boolean isTransposed) { if(isTransposed) { throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); @@ -104,4 +128,147 @@ public void testDecompressToDenseBlock(double[][] data, boolean isTransposed) { } } + public void testDecompressToDenseBlockPartialRange(double[][] data, boolean isTransposed, int rl, int ru) { + if(isTransposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + try { + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + assertTrue("Column group should be DeltaDDC, not Const", cg instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(numRows, numCols, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), rl, ru); + + MatrixBlock expected = DataConverter.convertToMatrixBlock(data); + for(int i = rl; i < ru; i++) { + for(int j = 0; j < numCols; j++) { + double expectedValue = expected.get(i, j); + double actualValue = ret.get(i, j); + assertArrayEquals(new double[] {expectedValue}, new double[] {actualValue}, 0.01); + } + } + + } + catch(NotImplementedException e) { + throw e; + } + catch(Exception e) { + e.printStackTrace(); + throw new DMLRuntimeException("Failed partial range decompression : " + this.getClass().getSimpleName(), e); + } + } + + @Test + public void testSerializationSingleColumn() throws IOException { + double[][] data = {{1}, {2}, {3}, {4}, {5}}; + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getDeltaColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup original = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + assertTrue("Original should be ColGroupDeltaDDC", original instanceof ColGroupDeltaDDC); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + ColGroupIO.writeGroups(dos, Collections.singletonList(original)); + assertEquals(original.getExactSizeOnDisk() + 4, bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + AColGroup deserialized = ColGroupIO.readGroups(dis, numRows).get(0); + + assertTrue("Deserialized should be ColGroupDeltaDDC", deserialized instanceof ColGroupDeltaDDC); + assertEquals("Compression type should match", original.getCompType(), deserialized.getCompType()); + assertEquals("Exact size on disk should match", original.getExactSizeOnDisk(), deserialized.getExactSizeOnDisk()); + + MatrixBlock originalDecompressed = new MatrixBlock(numRows, numCols, false); + originalDecompressed.allocateDenseBlock(); + original.decompressToDenseBlock(originalDecompressed.getDenseBlock(), 0, numRows); + + MatrixBlock deserializedDecompressed = new MatrixBlock(numRows, numCols, false); + deserializedDecompressed.allocateDenseBlock(); + deserialized.decompressToDenseBlock(deserializedDecompressed.getDenseBlock(), 0, numRows); + + for(int i = 0; i < numRows; i++) { + for(int j = 0; j < numCols; j++) { + assertArrayEquals(new double[] {originalDecompressed.get(i, j)}, new double[] {deserializedDecompressed.get(i, j)}, 0.01); + } + } + } + + @Test + public void testSerializationTwoColumns() throws IOException { + double[][] data = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getDeltaColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup original = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + assertTrue("Original should be ColGroupDeltaDDC", original instanceof ColGroupDeltaDDC); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + ColGroupIO.writeGroups(dos, Collections.singletonList(original)); + assertEquals(original.getExactSizeOnDisk() + 4, bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + AColGroup deserialized = ColGroupIO.readGroups(dis, numRows).get(0); + + assertTrue("Deserialized should be ColGroupDeltaDDC", deserialized instanceof ColGroupDeltaDDC); + assertEquals("Compression type should match", original.getCompType(), deserialized.getCompType()); + assertEquals("Exact size on disk should match", original.getExactSizeOnDisk(), deserialized.getExactSizeOnDisk()); + + MatrixBlock originalDecompressed = new MatrixBlock(numRows, numCols, false); + originalDecompressed.allocateDenseBlock(); + original.decompressToDenseBlock(originalDecompressed.getDenseBlock(), 0, numRows); + + MatrixBlock deserializedDecompressed = new MatrixBlock(numRows, numCols, false); + deserializedDecompressed.allocateDenseBlock(); + deserialized.decompressToDenseBlock(deserializedDecompressed.getDenseBlock(), 0, numRows); + + for(int i = 0; i < numRows; i++) { + for(int j = 0; j < numCols; j++) { + assertArrayEquals(new double[] {originalDecompressed.get(i, j)}, new double[] {deserializedDecompressed.get(i, j)}, 0.01); + } + } + } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java index de2d310acce..f0a3dda1c1c 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java @@ -24,6 +24,8 @@ import org.apache.sysds.runtime.compress.colgroup.AColGroup; import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.util.DataConverter; +import org.apache.sysds.test.TestUtils; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -34,28 +36,25 @@ public class JolEstimateDeltaDDCTest extends JolEstimateTest { public static Collection data() { ArrayList tests = new ArrayList<>(); - // MatrixBlock mb; + MatrixBlock mb; - // mb = DataConverter.convertToMatrixBlock(new double[][] {{0}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{0}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{1}}); + tests.add(new Object[] {mb}); - // TODO add reader that reads as if Delta encoded. - // then afterwards use this test. + mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3, 4, 5}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3, 4, 5}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{1,2,3},{1,1,1}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1,2,3},{1,1,1}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}}); - // tests.add(new Object[] {mb}); - - // mb = TestUtils.generateTestMatrixBlock(2, 5, 0, 20, 1.0, 7); - // tests.add(new Object[] {mb}); + mb = TestUtils.generateTestMatrixBlock(2, 5, 0, 20, 1.0, 7); + tests.add(new Object[] {mb}); return tests; } @@ -68,4 +67,9 @@ public JolEstimateDeltaDDCTest(MatrixBlock mb) { public AColGroup.CompressionType getCT() { return delta; } + + @Override + protected boolean shouldTranspose() { + return false; + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java index 8c30b398b7c..f4ffe92eb60 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java @@ -70,6 +70,10 @@ public abstract class JolEstimateTest { public abstract CompressionType getCT(); + protected boolean shouldTranspose() { + return true; + } + private final long actualSize; private final int actualNumberUnique; private final AColGroup cg; @@ -77,16 +81,21 @@ public abstract class JolEstimateTest { public JolEstimateTest(MatrixBlock mbt) { CompressedMatrixBlock.debug = true; this.mbt = mbt; - colIndexes = ColIndexFactory.create(mbt.getNumRows()); + colIndexes = ColIndexFactory.create(shouldTranspose() ? mbt.getNumRows() : mbt.getNumColumns()); mbt.recomputeNonZeros(); mbt.examSparsity(); try { - CompressionSettings cs = new CompressionSettingsBuilder().setSamplingRatio(1.0) - .setValidCompressions(EnumSet.of(getCT())).create(); - cs.transposed = true; + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(getCT())); + boolean useDelta = getCT() == CompressionType.DeltaDDC; + if(useDelta) + csb.setPreferDeltaEncoding(true); + CompressionSettings cs = csb.create(); + cs.transposed = shouldTranspose(); - final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes); + final ComEstExact est = new ComEstExact(mbt, cs); + final CompressedSizeInfoColGroup cgi = useDelta ? est.getDeltaColGroupInfo(colIndexes) : est.getColGroupInfo(colIndexes); final CompressedSizeInfo csi = new CompressedSizeInfo(cgi); final List groups = ColGroupFactory.compressColGroups(mbt, csi, cs, 1); @@ -158,13 +167,17 @@ public void compressedSizeInfoEstimatorSample(double ratio, double tolerance) { if(mbt.getNumColumns() > 10000) tolerance *= 0.95; - final CompressionSettings cs = csb.setSamplingRatio(ratio).setMinimumSampleSize(10) - .setValidCompressions(EnumSet.of(getCT())).create(); - cs.transposed = true; + CompressionSettingsBuilder testCsb = csb.setSamplingRatio(ratio).setMinimumSampleSize(10) + .setValidCompressions(EnumSet.of(getCT())); + boolean useDelta = getCT() == CompressionType.DeltaDDC; + if(useDelta) + testCsb.setPreferDeltaEncoding(true); + final CompressionSettings cs = testCsb.create(); + cs.transposed = shouldTranspose(); final int sampleSize = Math.max(10, (int) (mbt.getNumColumns() * ratio)); final AComEst est = ComEstFactory.createEstimator(mbt, cs, sampleSize, 1); - final CompressedSizeInfoColGroup cInfo = est.getColGroupInfo(colIndexes); + final CompressedSizeInfoColGroup cInfo = useDelta ? est.getDeltaColGroupInfo(colIndexes) : est.getColGroupInfo(colIndexes); final int estimateNUniques = cInfo.getNumVals(); final double estimateCSI = (cg.getCompType() == CompressionType.CONST) ? ColGroupSizes diff --git a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java index 5ba6b88d251..69ddb175628 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java @@ -18,8 +18,16 @@ */ package org.apache.sysds.test.component.compress.dictionary; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.functionobjects.And; import org.apache.sysds.runtime.functionobjects.Divide; import org.apache.sysds.runtime.functionobjects.Minus; @@ -83,51 +91,79 @@ public void testScalarOpRightDivideTwoColumns() { Assert.assertArrayEquals(expected, d.getValues(), 0.01); } - @Test + @Test(expected = NotImplementedException.class) public void testScalarOpRightPlusSingleColumn() { double scalar = 2; DeltaDictionary d = new DeltaDictionary(new double[] {1, 2}, 1); ScalarOperator sop = new RightScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {3, 2}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + d.applyScalarOp(sop); } - @Test + @Test(expected = NotImplementedException.class) public void testScalarOpRightPlusTwoColumns() { double scalar = 2; DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); ScalarOperator sop = new RightScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {3, 4, 3, 4}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + d.applyScalarOp(sop); } - @Test + @Test(expected = NotImplementedException.class) public void testScalarOpRightMinusTwoColumns() { double scalar = 2; DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); ScalarOperator sop = new RightScalarOperator(Minus.getMinusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {-1, 0, 3, 4}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + d.applyScalarOp(sop); } - @Test + @Test(expected = NotImplementedException.class) public void testScalarOpLeftPlusTwoColumns() { double scalar = 2; DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); ScalarOperator sop = new LeftScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {3, 4, 3, 4}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + d.applyScalarOp(sop); } @Test(expected = NotImplementedException.class) - public void testNotImplemented() { + public void testScalarOpAnd() { double scalar = 2; DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); ScalarOperator sop = new LeftScalarOperator(And.getAndFnObject(), scalar, 1); - d = d.applyScalarOp(sop); + d.applyScalarOp(sop); + } + + @Test + public void testSerializationSingleColumn() throws IOException { + DeltaDictionary original = new DeltaDictionary(new double[] {1, 2, 3, 4, 5}, 1); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + original.write(dos); + Assert.assertEquals(original.getExactSizeOnDisk(), bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + IDictionary deserialized = DictionaryFactory.read(dis); + + Assert.assertTrue("Deserialized dictionary should be DeltaDictionary", deserialized instanceof DeltaDictionary); + DeltaDictionary deltaDict = (DeltaDictionary) deserialized; + Assert.assertArrayEquals("Values should match after serialization", original.getValues(), deltaDict.getValues(), 0.01); + } + + @Test + public void testSerializationTwoColumns() throws IOException { + DeltaDictionary original = new DeltaDictionary(new double[] {1, 2, 3, 4, 5, 6}, 2); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + original.write(dos); + Assert.assertEquals(original.getExactSizeOnDisk(), bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + IDictionary deserialized = DictionaryFactory.read(dis); + + Assert.assertTrue("Deserialized dictionary should be DeltaDictionary", deserialized instanceof DeltaDictionary); + DeltaDictionary deltaDict = (DeltaDictionary) deserialized; + Assert.assertArrayEquals("Values should match after serialization", original.getValues(), deltaDict.getValues(), 0.01); } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java index 63a15119295..1da2af43246 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java @@ -24,19 +24,18 @@ import static org.junit.Assert.assertTrue; import org.apache.commons.lang3.NotImplementedException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory; import org.apache.sysds.runtime.compress.estim.encoding.EmptyEncoding; import org.apache.sysds.runtime.compress.estim.encoding.IEncode; +import org.apache.sysds.runtime.compress.estim.encoding.DenseEncoding; +import org.apache.sysds.runtime.compress.estim.encoding.SparseEncoding; +import org.apache.sysds.runtime.compress.estim.encoding.ConstEncoding; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.junit.Test; public class EncodeDeltaTest { - protected static final Log LOG = LogFactory.getLog(EncodeDeltaTest.class.getName()); - @Test public void testCreateFromMatrixBlockDeltaBasic() { MatrixBlock mb = new MatrixBlock(3, 2, false); @@ -49,8 +48,10 @@ public void testCreateFromMatrixBlockDeltaBasic() { mb.set(2, 1, 22); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); - assertTrue(encoding.getUnique() >= 1); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [10,20] stored as-is, deltas [1,1] for rows 1-2, so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); + assertTrue("Encoding should be dense", encoding.isDense()); } @Test @@ -63,7 +64,11 @@ public void testCreateFromMatrixBlockDeltaWithSampleSize() { } IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 3); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Sample size is 3, so should process 3 rows", 3, ((DenseEncoding) encoding).getMap().size()); + assertTrue("Should have at least 1 unique delta value", encoding.getUnique() >= 1); + assertTrue("Should have at most 3 unique delta values (one per row)", encoding.getUnique() <= 3); } @Test @@ -76,7 +81,9 @@ public void testCreateFromMatrixBlockDeltaFirstRowAsIs() { mb.set(1, 1, 10); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [5,10] stored as-is, delta [0,0] for row 1, so 2 unique: [5,10] and [0,0]", 2, encoding.getUnique()); } @Test @@ -93,8 +100,10 @@ public void testCreateFromMatrixBlockDeltaConstantDeltas() { mb.set(3, 1, 23); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); - assertTrue(encoding.getUnique() <= 2); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [10,20] stored as-is, all deltas are [1,1], so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); + assertTrue("Encoding should be dense", encoding.isDense()); } @Test @@ -105,7 +114,10 @@ public void testCreateFromMatrixBlockDeltaSingleRow() { mb.set(0, 1, 20); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + // Single row results in ConstEncoding because there is only 1 unique value (the row itself) + assertTrue("Single row should result in ConstEncoding", encoding instanceof ConstEncoding); + assertEquals("Single row has no deltas, so should have 1 unique value (the row itself)", 1, encoding.getUnique()); } @Test @@ -117,7 +129,10 @@ public void testCreateFromMatrixBlockDeltaSparse() { mb.set(2, 1, 22); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Sparse input may result in SparseEncoding or DenseEncoding", + encoding instanceof DenseEncoding || encoding instanceof SparseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); } @Test @@ -138,7 +153,10 @@ public void testCreateFromMatrixBlockDeltaColumnSelection() { mb.set(2, 3, 42); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(0, 2)); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Selected columns 0 and 2: first row [10,30] stored as-is, deltas [1,1] for rows 1-2, so 2 unique: [10,30] and [1,1]", 2, encoding.getUnique()); + assertEquals("Should have 3 rows in mapping", 3, ((DenseEncoding) encoding).getMap().size()); } @Test @@ -153,7 +171,10 @@ public void testCreateFromMatrixBlockDeltaNegativeValues() { mb.set(2, 1, 25); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + // Deltas: R0=[10,20], R1=[-2,-5], R2=[4,10] -> 3 unique values + assertEquals("Should have 3 unique values", 3, encoding.getUnique()); } @Test @@ -168,7 +189,10 @@ public void testCreateFromMatrixBlockDeltaZeros() { mb.set(2, 1, 5); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding or SparseEncoding", + encoding instanceof DenseEncoding || encoding instanceof SparseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); } @Test(expected = NotImplementedException.class) @@ -192,7 +216,10 @@ public void testCreateFromMatrixBlockDeltaLargeMatrix() { } IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(3)); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [0,0,0] stored as-is, all deltas are [1,2,3], so 2 unique: [0,0,0] and [1,2,3]", 2, encoding.getUnique()); + assertEquals("Should have 100 rows in mapping", 100, ((DenseEncoding) encoding).getMap().size()); } @Test @@ -205,7 +232,10 @@ public void testCreateFromMatrixBlockDeltaSampleSizeSmaller() { } IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 5); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Sample size is 5, so should process 5 rows", 5, ((DenseEncoding) encoding).getMap().size()); + assertEquals("First row [10,20] stored as-is, all deltas are [1,1], so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); } @Test @@ -218,32 +248,147 @@ public void testCreateFromMatrixBlockDeltaSampleSizeLarger() { } IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); - assertNotNull(encoding); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Sample size 10 > matrix rows 5, so should process all 5 rows", 5, ((DenseEncoding) encoding).getMap().size()); + assertEquals("First row [10,20] stored as-is, all deltas are [1,1], so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); } @Test public void testCreateFromMatrixBlockDeltaEmptyMatrix() { - // Test empty matrix with dimensions but all zeros MatrixBlock mb = new MatrixBlock(5, 2, false); mb.allocateDenseBlock(); - // Matrix has dimensions but is empty (all zeros) - // isEmpty() should return true IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); - assertTrue(encoding instanceof EmptyEncoding); + assertNotNull("Encoding should not be null", encoding); + // Empty matrix (all zeros) is constant 0 in delta encoding + assertTrue("Empty matrix should result in ConstEncoding or EmptyEncoding", + encoding instanceof ConstEncoding || encoding instanceof EmptyEncoding); + // Both ConstEncoding(0) and EmptyEncoding return 1 unique value (the zero tuple) + assertEquals("Encoding of zeros should have 1 unique value", 1, encoding.getUnique()); } @Test public void testCreateFromMatrixBlockDeltaEmptyMatrixSparse() { - // Test empty sparse matrix with dimensions MatrixBlock mb = new MatrixBlock(5, 2, true); - // Sparse matrix with no values is empty mb.setNonZeros(0); IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); - assertNotNull(encoding); - assertTrue(encoding instanceof EmptyEncoding); + assertNotNull("Encoding should not be null", encoding); + // Empty sparse matrix is also constant 0 + assertTrue("Empty sparse matrix should result in ConstEncoding or EmptyEncoding", + encoding instanceof ConstEncoding || encoding instanceof EmptyEncoding); + // Both ConstEncoding(0) and EmptyEncoding return 1 unique value (the zero tuple) + assertEquals("Encoding of zeros should have 1 unique value", 1, encoding.getUnique()); + } + + @Test + public void testCombineTwoDenseDeltaEncodings() { + MatrixBlock mb1 = new MatrixBlock(3, 1, false); + mb1.allocateDenseBlock(); + mb1.set(0, 0, 10); + mb1.set(1, 0, 11); + mb1.set(2, 0, 12); + + MatrixBlock mb2 = new MatrixBlock(3, 1, false); + mb2.allocateDenseBlock(); + mb2.set(0, 0, 20); + mb2.set(1, 0, 21); + mb2.set(2, 0, 22); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb1, false, ColIndexFactory.create(1)); + IEncode enc2 = EncodingFactory.createFromMatrixBlockDelta(mb2, false, ColIndexFactory.create(1)); + + assertNotNull("First encoding should not be null", enc1); + assertNotNull("Second encoding should not be null", enc2); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + assertTrue("Second encoding should be DenseEncoding", enc2 instanceof DenseEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertTrue("Combined encoding should be DenseEncoding", combined instanceof DenseEncoding); + assertTrue("Combined unique count should be at least max of inputs", + combined.getUnique() >= Math.max(enc1.getUnique(), enc2.getUnique())); + assertTrue("Combined unique count should be at most product of inputs", + combined.getUnique() <= enc1.getUnique() * enc2.getUnique()); + assertEquals("Combined mapping should have same size as input", + ((DenseEncoding) enc1).getMap().size(), ((DenseEncoding) combined).getMap().size()); + } + + @Test + public void testCombineDenseDeltaEncodingWithEmpty() { + MatrixBlock mb = new MatrixBlock(3, 1, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(1, 0, 11); + mb.set(2, 0, 12); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(1)); + IEncode enc2 = new EmptyEncoding(); + + assertNotNull("First encoding should not be null", enc1); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertEquals("Combining with EmptyEncoding should return original encoding", enc1, combined); + } + + @Test + public void testCombineDenseDeltaEncodingWithConst() { + MatrixBlock mb = new MatrixBlock(3, 1, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(1, 0, 11); + mb.set(2, 0, 12); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(1)); + + MatrixBlock constMb = new MatrixBlock(3, 1, false); + constMb.allocateDenseBlock(); + constMb.set(0, 0, 5); + constMb.set(1, 0, 5); + constMb.set(2, 0, 5); + IEncode enc2 = EncodingFactory.createFromMatrixBlock(constMb, false, ColIndexFactory.create(1)); + + assertNotNull("First encoding should not be null", enc1); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + assertTrue("Second encoding should be ConstEncoding", enc2 instanceof ConstEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertEquals("Combining with ConstEncoding should return original encoding", enc1, combined); + } + + @Test + public void testCombineDenseDeltaEncodingsWithDifferentDeltas() { + MatrixBlock mb1 = new MatrixBlock(4, 1, false); + mb1.allocateDenseBlock(); + mb1.set(0, 0, 1); + mb1.set(1, 0, 2); + mb1.set(2, 0, 4); + mb1.set(3, 0, 8); + + MatrixBlock mb2 = new MatrixBlock(4, 1, false); + mb2.allocateDenseBlock(); + mb2.set(0, 0, 10); + mb2.set(1, 0, 20); + mb2.set(2, 0, 40); + mb2.set(3, 0, 80); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb1, false, ColIndexFactory.create(1)); + IEncode enc2 = EncodingFactory.createFromMatrixBlockDelta(mb2, false, ColIndexFactory.create(1)); + + assertNotNull("First encoding should not be null", enc1); + assertNotNull("Second encoding should not be null", enc2); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + assertTrue("Second encoding should be DenseEncoding", enc2 instanceof DenseEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertTrue("Combined encoding should be DenseEncoding", combined instanceof DenseEncoding); + assertEquals("Combined mapping should have same size as input", + 4, ((DenseEncoding) combined).getMap().size()); } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java index 8c3d5a7b439..f91779e8d85 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java @@ -1,6 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.sysds.test.component.compress.lib; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; From e005659c940b2bae253e506c031278dbdd299e46 Mon Sep 17 00:00:00 2001 From: Hana Halitim Date: Thu, 11 Dec 2025 19:41:05 +0100 Subject: [PATCH 3/6] [SYSTEMDS-3539] DeltaDDC Refinements - Implemented DeltaDDC conversion to DDC for unsupported scalar/unary ops (e.g., K-Means). - Added comprehensive tests for relational and unary operations in ColGroupDeltaDDCTest. --- .../compress/colgroup/ColGroupDeltaDDC.java | 78 ++++++++++++- .../colgroup/ColGroupDeltaDDCTest.java | 104 ++++++++++++++++++ 2 files changed, 180 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java index c49845627c4..9c71e0c6ccb 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java @@ -22,6 +22,7 @@ import java.io.DataInput; import java.io.IOException; import java.util.Arrays; +import java.util.Comparator; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; @@ -34,6 +35,10 @@ import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.utils.ACount; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap; +import org.apache.sysds.runtime.compress.utils.DoubleCountHashMap; import org.apache.sysds.runtime.compress.utils.Util; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; @@ -267,7 +272,8 @@ else if(op.fn instanceof Plus || op.fn instanceof Minus) { return scalarOperationShift(op); } else { - throw new NotImplementedException("Scalar op " + op.fn.getClass().getSimpleName() + " not supported for DeltaDDC"); + AColGroup ddc = convertToDDC(); + return ddc.scalarOperation(op); } } @@ -327,7 +333,8 @@ private AColGroup scalarOperationShift(ScalarOperator op) { @Override public AColGroup unaryOperation(UnaryOperator op) { - throw new NotImplementedException("Unary operation not supported for DeltaDDC"); + AColGroup ddc = convertToDDC(); + return ddc.unaryOperation(op); } @Override @@ -489,4 +496,71 @@ public AColGroup sliceRows(int rl, int ru) { slicedData.set(0, newId); return ColGroupDeltaDDC.create(_colIndexes, newDict, slicedData, null); } + + private AColGroup convertToDDC() { + final int nCol = _colIndexes.size(); + final int nRow = _data.size(); + double[] values = new double[nRow * nCol]; + + double[] prevRow = new double[nCol]; + for(int i = 0; i < nRow; i++) { + final int dictIdx = _data.getIndex(i); + final double[] dictVals = _dict.getValues(); + final int rowIndex = dictIdx * nCol; + + for(int j = 0; j < nCol; j++) { + if(i == 0) { + prevRow[j] = dictVals[rowIndex + j]; + } + else { + prevRow[j] = prevRow[j] + dictVals[rowIndex + j]; + } + values[i * nCol + j] = prevRow[j]; + } + } + + return compress(values, _colIndexes); + } + + private static AColGroup compress(double[] values, IColIndex colIndexes) { + int nRow = values.length / colIndexes.size(); + int nCol = colIndexes.size(); + + if(nCol == 1) { + DoubleCountHashMap map = new DoubleCountHashMap(16); + AMapToData mapData = MapToFactory.create(nRow, 256); + for(int i = 0; i < nRow; i++) { + mapData.set(i, map.increment(values[i])); + } + if(map.size() == 1) + return ColGroupConst.create(colIndexes, Dictionary.create(new double[] {map.getMostFrequent()})); + + IDictionary dict = Dictionary.create(map.getDictionary()); + return ColGroupDDC.create(colIndexes, dict, mapData.resize(map.size()), null); + } + else { + DblArrayCountHashMap map = new DblArrayCountHashMap(16); + AMapToData mapData = MapToFactory.create(nRow, 256); + DblArray dblArray = new DblArray(new double[nCol]); + for(int i = 0; i < nRow; i++) { + System.arraycopy(values, i * nCol, dblArray.getData(), 0, nCol); + mapData.set(i, map.increment(dblArray)); + } + if(map.size() == 1) { + ACount[] counts = map.extractValues(); + return ColGroupConst.create(colIndexes, Dictionary.create(counts[0].key().getData())); + } + + ACount[] counts = map.extractValues(); + Arrays.sort(counts, Comparator.comparingInt(x -> x.id)); + + double[] dictValues = new double[counts.length * nCol]; + for(int i = 0; i < counts.length; i++) { + System.arraycopy(counts[i].key().getData(), 0, dictValues, i * nCol, nCol); + } + + IDictionary dict = Dictionary.create(dictValues); + return ColGroupDDC.create(colIndexes, dict, mapData.resize(map.size()), null); + } + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java index 54c35866aa6..9bf0d1984a8 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java @@ -46,7 +46,13 @@ import org.apache.sysds.runtime.compress.estim.ComEstExact; import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.Equals; +import org.apache.sysds.runtime.functionobjects.GreaterThan; import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.RightScalarOperator; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; import org.apache.sysds.runtime.util.DataConverter; import org.junit.Test; @@ -271,4 +277,102 @@ public void testSerializationTwoColumns() throws IOException { } } + @Test + public void testScalarEquals() { + double[][] data = {{0}, {1}, {2}, {3}, {0}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Equals.getEqualsFnObject(), 0.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(0.0, ret.get(1, 0), 0.0); + assertEquals(0.0, ret.get(2, 0), 0.0); + assertEquals(0.0, ret.get(3, 0), 0.0); + assertEquals(1.0, ret.get(4, 0), 0.0); + } + + @Test + public void testScalarGreaterThan() { + double[][] data = {{0}, {1}, {2}, {3}, {0}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(GreaterThan.getGreaterThanFnObject(), 1.5); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(0.0, ret.get(0, 0), 0.0); + assertEquals(0.0, ret.get(1, 0), 0.0); + assertEquals(1.0, ret.get(2, 0), 0.0); + assertEquals(1.0, ret.get(3, 0), 0.0); + assertEquals(0.0, ret.get(4, 0), 0.0); + } + + @Test + public void testUnaryOperationSqrt() { + double[][] data = {{1}, {4}, {9}, {16}, {25}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.SQRT)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.01); + assertEquals(2.0, ret.get(1, 0), 0.01); + assertEquals(3.0, ret.get(2, 0), 0.01); + assertEquals(4.0, ret.get(3, 0), 0.01); + assertEquals(5.0, ret.get(4, 0), 0.01); + } + + @Test + public void testScalarEqualsMultiColumn() { + double[][] data = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {0, 1}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Equals.getEqualsFnObject(), 0.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 2, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(0.0, ret.get(0, 1), 0.0); + assertEquals(0.0, ret.get(1, 0), 0.0); + assertEquals(0.0, ret.get(1, 1), 0.0); + assertEquals(0.0, ret.get(2, 0), 0.0); + assertEquals(0.0, ret.get(2, 1), 0.0); + assertEquals(0.0, ret.get(3, 0), 0.0); + assertEquals(0.0, ret.get(3, 1), 0.0); + assertEquals(1.0, ret.get(4, 0), 0.0); + assertEquals(0.0, ret.get(4, 1), 0.0); + } + + private AColGroup compressForTest(double[][] data) { + MatrixBlock mb = DataConverter.convertToMatrixBlock(data); + IColIndex colIndexes = ColIndexFactory.create(data[0].length); + CompressionSettings cs = new CompressionSettingsBuilder() + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mb, cs).getDeltaColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + return ColGroupFactory.compressColGroups(mb, csi, cs, 1).get(0); + } + } From 3888383c2443dad988f8d94ebeba8b364e486391 Mon Sep 17 00:00:00 2001 From: Hana Halitim Date: Sun, 14 Dec 2025 01:20:42 +0100 Subject: [PATCH 4/6] [SYSTEMDS-3539] DeltaDDC Refinements and Reviewer Fixes - ColGroupDDC: Reverted defensive try-catch around getValues() to match project convention. - ColGroupFactory: Removed redundant check preventing CONST groups when DeltaDDC is requested. - CLALibUnary: Removed flawed CUMSUM optimization and ROWCUMSUM support; rely on robust recompression fallback. - ColGroupDeltaDDC: Implemented dynamic resizing for map construction to handle unknown unique counts (>256). - ColGroupDeltaDDC: Fixed and verified scalar shift logic with map handling. - DblArrayCountHashMap: Removed redundant object creation. - Tests: Added comprehensive tests for scalar ops in ColGroupDeltaDDCTest; adjusted CLALibUnaryDeltaTest to reflect removed ROWCUMSUM support. --- .../compress/colgroup/ColGroupDDC.java | 37 +++++----------- .../compress/colgroup/ColGroupDeltaDDC.java | 29 +++++++----- .../compress/colgroup/ColGroupFactory.java | 2 +- .../runtime/compress/lib/CLALibUnary.java | 40 +---------------- .../compress/utils/DblArrayCountHashMap.java | 2 +- .../colgroup/ColGroupDeltaDDCTest.java | 44 +++++++++++++++++++ .../compress/lib/CLALibUnaryDeltaTest.java | 40 ----------------- 7 files changed, 76 insertions(+), 118 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index c6d3d6d10c3..ac4defcabd5 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -1119,36 +1119,19 @@ public AColGroup convertToDeltaDDC() { DblArray dblArray = new DblArray(rowDelta); int[] rowToDictId = new int[numRows]; - double[] dictVals = null; - try { - dictVals = _dict.getValues(); - } catch (Exception e) { - } + double[] dictVals = _dict.getValues(); for(int i = 0; i < numRows; i++) { int dictIdx = _data.getIndex(i); - if(dictVals != null) { - int off = dictIdx * numCols; - for(int j = 0; j < numCols; j++) { - double val = dictVals[off + j]; - if(i == 0) { - rowDelta[j] = val; - prevRow[j] = val; - } else { - rowDelta[j] = val - prevRow[j]; - prevRow[j] = val; - } - } - } else { - for(int j = 0; j < numCols; j++) { - double val = _dict.getValue(dictIdx, j, numCols); - if(i == 0) { - rowDelta[j] = val; - prevRow[j] = val; - } else { - rowDelta[j] = val - prevRow[j]; - prevRow[j] = val; - } + int off = dictIdx * numCols; + for(int j = 0; j < numCols; j++) { + double val = dictVals[off + j]; + if(i == 0) { + rowDelta[j] = val; + prevRow[j] = val; + } else { + rowDelta[j] = val - prevRow[j]; + prevRow[j] = val; } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java index 9c71e0c6ccb..09d7715e163 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java @@ -321,10 +321,9 @@ private AColGroup scalarOperationShift(ScalarOperator op) { idNew = nEntries; } - AMapToData newData = _data.slice(0, _data.size()); - if(idNew >= newData.getUpperBoundValue()) { - newData = newData.resize(idNew + 1); - } + AMapToData newData = MapToFactory.create(_data.size(), Math.max(_data.getUpperBoundValue(), idNew) + 1); + for(int i = 0; i < _data.size(); i++) + newData.set(i, _data.getIndex(i)); newData.set(0, idNew); return create(_colIndexes, newDict, newData, null); @@ -450,7 +449,6 @@ public long getNumberNonZeros(int nRows) { @Override public AColGroup sliceRows(int rl, int ru) { - AMapToData slicedData = _data.slice(rl, ru); final int nCol = _colIndexes.size(); double[] firstRowValues = new double[nCol]; double[] dictVals = ((DeltaDictionary)_dict).getValues(); @@ -487,12 +485,13 @@ public AColGroup sliceRows(int rl, int ru) { System.arraycopy(firstRowValues, 0, newDictVals, dictVals.length, nCol); newDict = new DeltaDictionary(newDictVals, nCol); newId = nEntries; - - if(newId >= slicedData.getUpperBoundValue()) { - slicedData = slicedData.resize(newId + 1); - } } + int numRows = ru - rl; + AMapToData slicedData = MapToFactory.create(numRows, Math.max(_data.getUpperBoundValue(), newId) + 1); + for(int i = 0; i < numRows; i++) + slicedData.set(i, _data.getIndex(rl + i)); + slicedData.set(0, newId); return ColGroupDeltaDDC.create(_colIndexes, newDict, slicedData, null); } @@ -530,7 +529,11 @@ private static AColGroup compress(double[] values, IColIndex colIndexes) { DoubleCountHashMap map = new DoubleCountHashMap(16); AMapToData mapData = MapToFactory.create(nRow, 256); for(int i = 0; i < nRow; i++) { - mapData.set(i, map.increment(values[i])); + int id = map.increment(values[i]); + if(id >= mapData.getUpperBoundValue()) { + mapData = mapData.resize(Math.max(mapData.getUpperBoundValue() * 2, id + 1)); + } + mapData.set(i, id); } if(map.size() == 1) return ColGroupConst.create(colIndexes, Dictionary.create(new double[] {map.getMostFrequent()})); @@ -544,7 +547,11 @@ private static AColGroup compress(double[] values, IColIndex colIndexes) { DblArray dblArray = new DblArray(new double[nCol]); for(int i = 0; i < nRow; i++) { System.arraycopy(values, i * nCol, dblArray.getData(), 0, nCol); - mapData.set(i, map.increment(dblArray)); + int id = map.increment(dblArray); + if(id >= mapData.getUpperBoundValue()) { + mapData = mapData.resize(Math.max(mapData.getUpperBoundValue() * 2, id + 1)); + } + mapData.set(i, id); } if(map.size() == 1) { ACount[] counts = map.extractValues(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index b3c7e21ed7e..de333b9e05d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -318,7 +318,7 @@ else if(ct == CompressionType.SDC && colIndexes.size() == 1 && !t) { return new ColGroupEmpty(colIndexes); } final IntArrayList[] of = ubm.getOffsetList(); - if(of.length == 1 && of[0].size() == nRow && ct != CompressionType.DeltaDDC) { // If this always constant + if(of.length == 1 && of[0].size() == nRow) { // If this always constant return ColGroupConst.create(colIndexes, DictionaryFactory.create(ubm)); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java index 335aaaef0c5..f0299669a06 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java @@ -30,7 +30,6 @@ import org.apache.sysds.runtime.compress.CompressionStatistics; import org.apache.sysds.runtime.compress.colgroup.AColGroup; import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; -import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode; import org.apache.sysds.runtime.matrix.data.LibMatrixAgg; @@ -50,43 +49,8 @@ public static MatrixBlock unaryOperations(CompressedMatrixBlock m, UnaryOperator final int r = m.getNumRows(); final int c = m.getNumColumns(); - if(Builtin.isBuiltinCode(op.fn, BuiltinCode.CUMSUM, BuiltinCode.ROWCUMSUM)) { - List groups = m.getColGroups(); - boolean allDDC = true; - for(AColGroup g : groups) { - if(g.getCompType() != CompressionType.DDC) { - allDDC = false; - break; - } - } - - if(allDDC && !groups.isEmpty()) { - MatrixBlock uncompressed = m.getUncompressed("CUMSUM/ROWCUMSUM requires uncompression", op.getNumThreads()); - MatrixBlock opResult = uncompressed.unaryOperations(op, null); - - List convertedGroups = new ArrayList<>(groups.size()); - for(AColGroup g : groups) { - AColGroup converted = ((ColGroupDDC) g).convertToDeltaDDC(); - if(converted == null) { - allDDC = false; - break; - } - convertedGroups.add(converted); - } - - if(allDDC) { - CompressedMatrixBlock ret = new CompressedMatrixBlock(m.getNumRows(), m.getNumColumns()); - ret.allocateColGroupList(convertedGroups); - ret.recomputeNonZeros(); - - MatrixBlock verifyDecompressed = ret.getUncompressed("Verification", op.getNumThreads()); - if(verifyDecompressed.equals(opResult)) { - return ret; - } - } - } - - MatrixBlock uncompressed = m.getUncompressed("CUMSUM/ROWCUMSUM requires uncompression", op.getNumThreads()); + if(Builtin.isBuiltinCode(op.fn, BuiltinCode.CUMSUM)) { + MatrixBlock uncompressed = m.getUncompressed("CUMSUM requires uncompression", op.getNumThreads()); MatrixBlock opResult = uncompressed.unaryOperations(op, null); CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java b/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java index 6b720c9dee2..cf8771d83aa 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java +++ b/src/main/java/org/apache/sysds/runtime/compress/utils/DblArrayCountHashMap.java @@ -40,7 +40,7 @@ protected final int hash(DblArray key) { } protected final DArrCounts create(DblArray key, int id) { - return new DArrCounts(new DblArray(key), id); + return new DArrCounts(key, id); } @Override diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java index 9bf0d1984a8..ddfa589142b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java @@ -49,6 +49,8 @@ import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.Equals; import org.apache.sysds.runtime.functionobjects.GreaterThan; +import org.apache.sysds.runtime.functionobjects.Minus; +import org.apache.sysds.runtime.functionobjects.Plus; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.RightScalarOperator; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; @@ -317,6 +319,48 @@ public void testScalarGreaterThan() { assertEquals(0.0, ret.get(4, 0), 0.0); } + @Test + public void testScalarPlus() { + double[][] data = {{1}, {2}, {3}, {4}, {5}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 10.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(11.0, ret.get(0, 0), 0.0); + assertEquals(12.0, ret.get(1, 0), 0.0); + assertEquals(13.0, ret.get(2, 0), 0.0); + assertEquals(14.0, ret.get(3, 0), 0.0); + assertEquals(15.0, ret.get(4, 0), 0.0); + } + + @Test + public void testScalarMinus() { + double[][] data = {{11}, {12}, {13}, {14}, {15}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Minus.getMinusFnObject(), 10.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(1, 0), 0.0); + assertEquals(3.0, ret.get(2, 0), 0.0); + assertEquals(4.0, ret.get(3, 0), 0.0); + assertEquals(5.0, ret.get(4, 0), 0.0); + } + @Test public void testUnaryOperationSqrt() { double[][] data = {{1}, {4}, {9}, {16}, {25}}; diff --git a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java index f91779e8d85..414db621ade 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java @@ -75,46 +75,6 @@ public void testCumsumResultsInDeltaEncoding() { assertTrue("Result should contain DeltaDDC column group", hasDeltaDDC); } - @Test - public void testRowcumsumResultsInDeltaEncoding() { - MatrixBlock mb = new MatrixBlock(3, 4, false); - mb.allocateDenseBlock(); - // Row 1: 1, 2, 3, 4 -> cumsum: 1, 3, 6, 10 - mb.set(0, 0, 1.0); - mb.set(0, 1, 2.0); - mb.set(0, 2, 3.0); - mb.set(0, 3, 4.0); - // Row 2: 1, 1, 1, 1 -> cumsum: 1, 2, 3, 4 - mb.set(1, 0, 1.0); - mb.set(1, 1, 1.0); - mb.set(1, 2, 1.0); - mb.set(1, 3, 1.0); - // Row 3: 5, 5, 5, 5 -> cumsum: 5, 10, 15, 20 - mb.set(2, 0, 5.0); - mb.set(2, 1, 5.0); - mb.set(2, 2, 5.0); - mb.set(2, 3, 5.0); - mb.setNonZeros(12); - - CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); - csb.addValidCompression(CompressionType.DDC); - CompressedMatrixBlock cmb = compress(mb, csb); - - UnaryOperator rowCumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ROWCUMSUM)); - MatrixBlock result = CLALibUnary.unaryOperations(cmb, rowCumsumOp, null); - - assertNotNull("Result should not be null", result); - assertTrue("Result should be compressed", result instanceof CompressedMatrixBlock); - - CompressedMatrixBlock compressedResult = (CompressedMatrixBlock) result; - // Delta encoding is row-wise, so row cumsum might not always benefit from delta DDC as much as col cumsum - // but we enforce it preference so it should be there if applicable. - // Actually for row cumsum, the result across columns changes. - // Let's check correctness mainly. - MatrixBlock expected = mb.unaryOperations(rowCumsumOp, new MatrixBlock()); - TestUtils.compareMatrices(expected, result, 0.0, "RowCumsum result should match expected"); - } - @Test public void testCumsumCorrectness() { MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 3, 0, 10, 1.0, 7); From 8d8fcc3d4e4fffe2df6da9c48cf224c674ee337b Mon Sep 17 00:00:00 2001 From: Hana Halitim Date: Sun, 14 Dec 2025 12:54:22 +0100 Subject: [PATCH 5/6] [SYSTEMDS-3539] DeltaDDC Scalar Operations and Slicing Fixes - Corrected scalar Multiply and Divide for DeltaDDC by scaling the dictionary values instead of falling back to default DDC logic (which was incorrect for deltas). - Added unit tests for scalar operations (Plus, Minus, Multiply, Divide) in ColGroupDeltaDDCTest. - Implemented and tested sliceRows support in ColGroupDeltaDDCTest, verifying that slicing DeltaDDC column groups preserves the delta encoding structure. - Refined CLALibUnary structure by moving CUMSUM optimization check after isEmpty() check. --- .../compress/colgroup/ColGroupDeltaDDC.java | 6 +- .../runtime/compress/lib/CLALibUnary.java | 11 ++-- .../colgroup/ColGroupDeltaDDCTest.java | 62 +++++++++++++++++++ 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java index 09d7715e163..08bdfd1e1d8 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java @@ -266,7 +266,11 @@ protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR @Override public AColGroup scalarOperation(ScalarOperator op) { if(op.fn instanceof Multiply || op.fn instanceof Divide) { - return super.scalarOperation(op); + double[] val = _dict.getValues(); + double[] newVal = new double[val.length]; + for(int i = 0; i < val.length; i++) + newVal[i] = op.executeScalar(val[i]); + return create(_colIndexes, new DeltaDictionary(newVal, _colIndexes.size()), _data, getCounts()); } else if(op.fn instanceof Plus || op.fn instanceof Minus) { return scalarOperationShift(op); diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java index f0299669a06..cc0ff901df4 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java @@ -49,6 +49,10 @@ public static MatrixBlock unaryOperations(CompressedMatrixBlock m, UnaryOperator final int r = m.getNumRows(); final int c = m.getNumColumns(); + // early aborts: + if(m.isEmpty()) + return new MatrixBlock(r, c, 0).unaryOperations(op, result); + if(Builtin.isBuiltinCode(op.fn, BuiltinCode.CUMSUM)) { MatrixBlock uncompressed = m.getUncompressed("CUMSUM requires uncompression", op.getNumThreads()); MatrixBlock opResult = uncompressed.unaryOperations(op, null); @@ -76,11 +80,8 @@ public static MatrixBlock unaryOperations(CompressedMatrixBlock m, UnaryOperator return finalResult; } - - // early aborts: - if(m.isEmpty()) - return new MatrixBlock(r, c, 0).unaryOperations(op, result); - else if(overlapping) { + + if(overlapping) { // when in overlapping state it is guaranteed that there is no infinites, NA, or NANs. if(Builtin.isBuiltinCode(op.fn, BuiltinCode.ISINF, BuiltinCode.ISNA, BuiltinCode.ISNAN)) return new MatrixBlock(r, c, 0); diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java index ddfa589142b..e11a98f4f6d 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java @@ -47,7 +47,9 @@ import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.Divide; import org.apache.sysds.runtime.functionobjects.Equals; +import org.apache.sysds.runtime.functionobjects.Multiply; import org.apache.sysds.runtime.functionobjects.GreaterThan; import org.apache.sysds.runtime.functionobjects.Minus; import org.apache.sysds.runtime.functionobjects.Plus; @@ -406,6 +408,66 @@ public void testScalarEqualsMultiColumn() { assertEquals(0.0, ret.get(4, 1), 0.0); } + @Test + public void testScalarMultiply() { + double[][] data = {{1}, {2}, {3}, {4}, {5}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Multiply.getMultiplyFnObject(), 2.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(2.0, ret.get(0, 0), 0.0); + assertEquals(4.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(2, 0), 0.0); + assertEquals(8.0, ret.get(3, 0), 0.0); + assertEquals(10.0, ret.get(4, 0), 0.0); + } + + @Test + public void testScalarDivide() { + double[][] data = {{2}, {4}, {6}, {8}, {10}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Divide.getDivideFnObject(), 2.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(1, 0), 0.0); + assertEquals(3.0, ret.get(2, 0), 0.0); + assertEquals(4.0, ret.get(3, 0), 0.0); + assertEquals(5.0, ret.get(4, 0), 0.0); + } + + @Test + public void testSliceRows() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(1, 4); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(3.0, ret.get(0, 0), 0.0); + assertEquals(4.0, ret.get(0, 1), 0.0); + assertEquals(5.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(1, 1), 0.0); + assertEquals(7.0, ret.get(2, 0), 0.0); + assertEquals(8.0, ret.get(2, 1), 0.0); + } + private AColGroup compressForTest(double[][] data) { MatrixBlock mb = DataConverter.convertToMatrixBlock(data); IColIndex colIndexes = ColIndexFactory.create(data[0].length); From 0d64bffd62e3875b12c2479c02edaf44ee883432 Mon Sep 17 00:00:00 2001 From: Hana Halitim Date: Sun, 21 Dec 2025 21:26:04 +0100 Subject: [PATCH 6/6] [SYSTEMDS-3539] Improve DeltaDDC Test Coverage for Codecov --- .../estim/encoding/EncodingFactory.java | 11 +- .../compress/colgroup/ColGroupDDCTest.java | 216 ++++++++++ .../colgroup/ColGroupDeltaDDCTest.java | 281 +++++++++++++ .../colgroup/ColGroupFactoryDeltaDDCTest.java | 229 +++++++++++ .../colgroup/ColGroupFactoryTest.java | 8 +- .../dictionary/DeltaDictionaryTest.java | 86 ++-- .../estim/encoding/EncodeDeltaTest.java | 97 ++++- .../ReaderColumnSelectionSparseDeltaTest.java | 137 +++++++ .../compress/readers/ReadersDeltaTest.java | 375 +++++++++++++++++- 9 files changed, 1357 insertions(+), 83 deletions(-) create mode 100644 src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java create mode 100644 src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryDeltaDDCTest.java create mode 100644 src/test/java/org/apache/sysds/test/component/compress/readers/ReaderColumnSelectionSparseDeltaTest.java diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java index 7d402bc20d6..068b79f9864 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java @@ -699,10 +699,13 @@ private static IEncode createWithDeltaReader(MatrixBlock m, IColIndex rowCols, b final DblArrayCountHashMap map = new DblArrayCountHashMap(); final IntArrayList offsets = new IntArrayList(); DblArray cellVals = reader1.nextRow(); + boolean isFirstRow = true; while(cellVals != null) { map.increment(cellVals); - offsets.appendValue(reader1.getCurrentRowIndex()); + if(isFirstRow || !cellVals.isEmpty()) + offsets.appendValue(reader1.getCurrentRowIndex()); + isFirstRow = false; cellVals = reader1.nextRow(); } @@ -740,8 +743,12 @@ private static IEncode createWithDeltaReaderSparse(MatrixBlock m, DblArrayCountH final AMapToData d = MapToFactory.create(offsets.size(), map.size()); int i = 0; + boolean isFirstRow = true; while(cellVals != null) { - d.set(i++, map.getId(cellVals)); + if(isFirstRow || !cellVals.isEmpty()) { + d.set(i++, map.getId(cellVals)); + } + isFirstRow = false; cellVals = reader2.nextRow(); } diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java new file mode 100644 index 00000000000..0f04cfc9c27 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.colgroup; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC; +import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ColGroupDDCTest { + + protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName()); + + @Test + public void testConvertToDeltaDDCBasic() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(10.0, mb.get(0, 0), 0.0); + assertEquals(20.0, mb.get(0, 1), 0.0); + assertEquals(11.0, mb.get(1, 0), 0.0); + assertEquals(21.0, mb.get(1, 1), 0.0); + assertEquals(12.0, mb.get(2, 0), 0.0); + assertEquals(22.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCSingleColumn() { + IColIndex colIndexes = ColIndexFactory.create(1); + double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(5, 5); + for(int i = 0; i < 5; i++) + data.set(i, i); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(5, 1, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5); + + assertEquals(1.0, mb.get(0, 0), 0.0); + assertEquals(2.0, mb.get(1, 0), 0.0); + assertEquals(3.0, mb.get(2, 0), 0.0); + assertEquals(4.0, mb.get(3, 0), 0.0); + assertEquals(5.0, mb.get(4, 0), 0.0); + } + + @Test + public void testConvertToDeltaDDCWithRepeatedValues() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {10.0, 20.0, 10.0, 20.0, 10.0, 20.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(10.0, mb.get(0, 0), 0.0); + assertEquals(20.0, mb.get(0, 1), 0.0); + assertEquals(10.0, mb.get(1, 0), 0.0); + assertEquals(20.0, mb.get(1, 1), 0.0); + assertEquals(10.0, mb.get(2, 0), 0.0); + assertEquals(20.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCWithNegativeDeltas() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {10.0, 20.0, 8.0, 15.0, 12.0, 25.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(10.0, mb.get(0, 0), 0.0); + assertEquals(20.0, mb.get(0, 1), 0.0); + assertEquals(8.0, mb.get(1, 0), 0.0); + assertEquals(15.0, mb.get(1, 1), 0.0); + assertEquals(12.0, mb.get(2, 0), 0.0); + assertEquals(25.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCWithZeroDeltas() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {5.0, 0.0, 5.0, 0.0, 0.0, 5.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(5.0, mb.get(0, 0), 0.0); + assertEquals(0.0, mb.get(0, 1), 0.0); + assertEquals(5.0, mb.get(1, 0), 0.0); + assertEquals(0.0, mb.get(1, 1), 0.0); + assertEquals(0.0, mb.get(2, 0), 0.0); + assertEquals(5.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCMultipleUniqueDeltas() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(4, 4); + for(int i = 0; i < 4; i++) + data.set(i, i); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(4, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4); + + assertEquals(1.0, mb.get(0, 0), 0.0); + assertEquals(2.0, mb.get(0, 1), 0.0); + assertEquals(3.0, mb.get(1, 0), 0.0); + assertEquals(4.0, mb.get(1, 1), 0.0); + assertEquals(5.0, mb.get(2, 0), 0.0); + assertEquals(6.0, mb.get(2, 1), 0.0); + assertEquals(7.0, mb.get(3, 0), 0.0); + assertEquals(8.0, mb.get(3, 1), 0.0); + } +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java index e11a98f4f6d..c953792a038 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java @@ -468,6 +468,287 @@ public void testSliceRows() { assertEquals(8.0, ret.get(2, 1), 0.0); } + @Test + public void testSliceRowsWithMatchingDictionaryEntry() { + double[][] data = {{1, 2}, {3, 4}, {1, 2}, {5, 6}, {7, 8}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(2, 5); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(0, 1), 0.0); + assertEquals(5.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(1, 1), 0.0); + assertEquals(7.0, ret.get(2, 0), 0.0); + assertEquals(8.0, ret.get(2, 1), 0.0); + } + + @Test + public void testSliceRowsWithNoMatchingDictionaryEntry() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(1, 3); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(2, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 2); + + assertEquals(3.0, ret.get(0, 0), 0.0); + assertEquals(4.0, ret.get(0, 1), 0.0); + assertEquals(5.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(1, 1), 0.0); + } + + @Test + public void testSliceRowsFromMiddleRow() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(2, 4); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(2, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 2); + + assertEquals(5.0, ret.get(0, 0), 0.0); + assertEquals(6.0, ret.get(0, 1), 0.0); + assertEquals(7.0, ret.get(1, 0), 0.0); + assertEquals(8.0, ret.get(1, 1), 0.0); + } + + @Test + public void testDecompressToSparseBlock() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(3, 2, true); + ret.allocateSparseRowsBlock(); + cg.decompressToSparseBlock(ret.getSparseBlock(), 0, 3); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(0, 1), 0.0); + assertEquals(3.0, ret.get(1, 0), 0.0); + assertEquals(4.0, ret.get(1, 1), 0.0); + assertEquals(5.0, ret.get(2, 0), 0.0); + assertEquals(6.0, ret.get(2, 1), 0.0); + } + + @Test + public void testDecompressToSparseBlockWithRlGreaterThanZero() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(4, 2, true); + ret.allocateSparseRowsBlock(); + cg.decompressToSparseBlock(ret.getSparseBlock(), 2, 4, 0, 0); + + assertEquals(5.0, ret.get(2, 0), 0.0); + assertEquals(6.0, ret.get(2, 1), 0.0); + assertEquals(7.0, ret.get(3, 0), 0.0); + assertEquals(8.0, ret.get(3, 1), 0.0); + } + + @Test + public void testDecompressToSparseBlockWithOffset() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(5, 4, true); + ret.allocateSparseRowsBlock(); + cg.decompressToSparseBlock(ret.getSparseBlock(), 0, 3, 1, 1); + + assertEquals(1.0, ret.get(1, 1), 0.0); + assertEquals(2.0, ret.get(1, 2), 0.0); + assertEquals(3.0, ret.get(2, 1), 0.0); + assertEquals(4.0, ret.get(2, 2), 0.0); + assertEquals(5.0, ret.get(3, 1), 0.0); + assertEquals(6.0, ret.get(3, 2), 0.0); + } + + @Test + public void testGetNumberNonZeros() { + double[][] data = {{1, 0}, {2, 3}, {0, 4}, {5, 0}}; + AColGroup cg = compressForTest(data); + + long nnz = cg.getNumberNonZeros(4); + assertEquals(5L, nnz); + } + + @Test + public void testGetNumberNonZerosAllZeros() { + double[][] data = {{0, 0}, {0, 0}, {0, 0}}; + AColGroup cg = compressForTest(data); + + long nnz = cg.getNumberNonZeros(3); + assertEquals(0L, nnz); + } + + @Test + public void testGetNumberNonZerosAllNonZeros() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + long nnz = cg.getNumberNonZeros(3); + assertEquals(6L, nnz); + } + + @Test + public void testDecompressToDenseBlockNonContiguousPath() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(3, 5, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), 0, 3, 0, 2); + + assertEquals(1.0, ret.get(0, 2), 0.0); + assertEquals(2.0, ret.get(0, 3), 0.0); + assertEquals(3.0, ret.get(1, 2), 0.0); + assertEquals(4.0, ret.get(1, 3), 0.0); + assertEquals(5.0, ret.get(2, 2), 0.0); + assertEquals(6.0, ret.get(2, 3), 0.0); + } + + @Test + public void testDecompressToDenseBlockFirstRowPath() { + double[][] data = {{10, 20}, {11, 21}, {12, 22}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), 0, 1); + + assertEquals(10.0, ret.get(0, 0), 0.0); + assertEquals(20.0, ret.get(0, 1), 0.0); + } + + @Test + public void testScalarOperationShiftWithExistingMatch() { + double[][] data = {{1}, {2}, {3}, {1}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 1.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(4, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 4); + + assertEquals(2.0, ret.get(0, 0), 0.0); + assertEquals(3.0, ret.get(1, 0), 0.0); + assertEquals(4.0, ret.get(2, 0), 0.0); + assertEquals(2.0, ret.get(3, 0), 0.0); + } + + @Test + public void testScalarOperationShiftWithCountsId0EqualsOne() { + double[][] data = {{1}, {2}, {3}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 5.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(6.0, ret.get(0, 0), 0.0); + assertEquals(7.0, ret.get(1, 0), 0.0); + assertEquals(8.0, ret.get(2, 0), 0.0); + } + + @Test + public void testScalarOperationShiftWithNoMatch() { + double[][] data = {{1}, {2}, {3}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 10.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(11.0, ret.get(0, 0), 0.0); + assertEquals(12.0, ret.get(1, 0), 0.0); + assertEquals(13.0, ret.get(2, 0), 0.0); + } + + @Test + public void testUnaryOperationTriggersConvertToDDC() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(1.0, ret.get(0, 0), 0.01); + assertEquals(2.0, ret.get(0, 1), 0.01); + assertEquals(3.0, ret.get(1, 0), 0.01); + assertEquals(4.0, ret.get(1, 1), 0.01); + assertEquals(5.0, ret.get(2, 0), 0.01); + assertEquals(6.0, ret.get(2, 1), 0.01); + } + + @Test + public void testUnaryOperationWithConstantResultSingleColumn() { + double[][] data = {{5}, {5}, {5}, {5}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(4, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 4); + + assertEquals(5.0, ret.get(0, 0), 0.01); + assertEquals(5.0, ret.get(1, 0), 0.01); + assertEquals(5.0, ret.get(2, 0), 0.01); + assertEquals(5.0, ret.get(3, 0), 0.01); + } + + @Test + public void testUnaryOperationWithConstantResultMultiColumn() { + double[][] data = {{10, 20}, {10, 20}, {10, 20}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(10.0, ret.get(0, 0), 0.01); + assertEquals(20.0, ret.get(0, 1), 0.01); + assertEquals(10.0, ret.get(1, 0), 0.01); + assertEquals(20.0, ret.get(1, 1), 0.01); + assertEquals(10.0, ret.get(2, 0), 0.01); + assertEquals(20.0, ret.get(2, 1), 0.01); + } + private AColGroup compressForTest(double[][] data) { MatrixBlock mb = DataConverter.convertToMatrixBlock(data); IColIndex colIndexes = ColIndexFactory.create(data[0].length); diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryDeltaDDCTest.java new file mode 100644 index 00000000000..c7439652956 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryDeltaDDCTest.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.colgroup; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.sysds.runtime.compress.CompressionSettings; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC; +import org.apache.sysds.runtime.compress.colgroup.ColGroupEmpty; +import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; +import org.apache.sysds.runtime.compress.estim.EstimationFactors; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ColGroupFactoryDeltaDDCTest { + + @Test + public void testCompressDeltaDDCSingleColumnWithGaps() { + MatrixBlock mb = new MatrixBlock(10, 1, true); + mb.set(0, 0, 10); + mb.set(5, 0, 15); + mb.set(9, 0, 20); + + IColIndex cols = ColIndexFactory.create(1); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 3; + final EstimationFactors f = new EstimationFactors(3, nRow, offs, 0.3); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCSingleColumnEmpty() { + MatrixBlock mb = new MatrixBlock(10, 1, true); + + IColIndex cols = ColIndexFactory.create(1); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 0; + final EstimationFactors f = new EstimationFactors(0, nRow, offs, 0.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be Empty", groups.get(0) instanceof ColGroupEmpty); + } + + @Test + public void testCompressDeltaDDCMultiColumnWithGaps() { + MatrixBlock mb = new MatrixBlock(20, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(5, 0, 15); + mb.set(5, 1, 25); + mb.set(10, 0, 20); + mb.set(10, 1, 30); + mb.set(15, 0, 25); + mb.set(15, 1, 35); + + IColIndex cols = ColIndexFactory.create(2); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 4; + final EstimationFactors f = new EstimationFactors(4, nRow, offs, 0.2); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCMultiColumnEmpty() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + + IColIndex cols = ColIndexFactory.create(2); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 0; + final EstimationFactors f = new EstimationFactors(0, nRow, offs, 0.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be Empty", groups.get(0) instanceof ColGroupEmpty); + } + + @Test + public void testCompressDeltaDDCMultiColumnSparseWithGaps() { + MatrixBlock mb = new MatrixBlock(50, 3, true); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.set(0, 2, 3); + mb.set(10, 0, 11); + mb.set(10, 1, 12); + mb.set(10, 2, 13); + mb.set(20, 0, 21); + mb.set(20, 1, 22); + mb.set(20, 2, 23); + mb.set(30, 0, 31); + mb.set(30, 1, 32); + mb.set(30, 2, 33); + mb.set(40, 0, 41); + mb.set(40, 1, 42); + mb.set(40, 2, 43); + + IColIndex cols = ColIndexFactory.create(3); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 5; + final EstimationFactors f = new EstimationFactors(5, nRow, offs, 0.1); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCSingleColumnDense() { + MatrixBlock mb = new MatrixBlock(10, 1, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, i + 1); + } + + IColIndex cols = ColIndexFactory.create(1); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 10; + final EstimationFactors f = new EstimationFactors(10, nRow, offs, 1.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCMultiColumnDense() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, i + 1); + mb.set(i, 1, (i + 1) * 2); + } + + IColIndex cols = ColIndexFactory.create(2); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 10; + final EstimationFactors f = new EstimationFactors(10, nRow, offs, 1.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java index 0468de4dc04..c4da48a0232 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java @@ -323,9 +323,9 @@ public boolean isContiguous() { return false; } - @Override - public int numBlocks() { - return 2; - } + @Override + public int numBlocks() { + return 2; } } +} diff --git a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java index 69ddb175628..52b88d83a53 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java @@ -24,15 +24,12 @@ import java.io.DataOutputStream; import java.io.IOException; -import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; -import org.apache.sysds.runtime.functionobjects.And; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary.DictType; import org.apache.sysds.runtime.functionobjects.Divide; -import org.apache.sysds.runtime.functionobjects.Minus; import org.apache.sysds.runtime.functionobjects.Multiply; -import org.apache.sysds.runtime.functionobjects.Plus; import org.apache.sysds.runtime.matrix.operators.LeftScalarOperator; import org.apache.sysds.runtime.matrix.operators.RightScalarOperator; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; @@ -91,45 +88,6 @@ public void testScalarOpRightDivideTwoColumns() { Assert.assertArrayEquals(expected, d.getValues(), 0.01); } - @Test(expected = NotImplementedException.class) - public void testScalarOpRightPlusSingleColumn() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2}, 1); - ScalarOperator sop = new RightScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d.applyScalarOp(sop); - } - - @Test(expected = NotImplementedException.class) - public void testScalarOpRightPlusTwoColumns() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new RightScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d.applyScalarOp(sop); - } - - @Test(expected = NotImplementedException.class) - public void testScalarOpRightMinusTwoColumns() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new RightScalarOperator(Minus.getMinusFnObject(), scalar, 1); - d.applyScalarOp(sop); - } - - @Test(expected = NotImplementedException.class) - public void testScalarOpLeftPlusTwoColumns() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new LeftScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d.applyScalarOp(sop); - } - - @Test(expected = NotImplementedException.class) - public void testScalarOpAnd() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new LeftScalarOperator(And.getAndFnObject(), scalar, 1); - d.applyScalarOp(sop); - } @Test public void testSerializationSingleColumn() throws IOException { @@ -166,4 +124,46 @@ public void testSerializationTwoColumns() throws IOException { DeltaDictionary deltaDict = (DeltaDictionary) deserialized; Assert.assertArrayEquals("Values should match after serialization", original.getValues(), deltaDict.getValues(), 0.01); } + + @Test + public void testGetValue() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, 2); + Assert.assertEquals(1.0, d.getValue(0, 0, 2), 0.01); + Assert.assertEquals(2.0, d.getValue(0, 1, 2), 0.01); + Assert.assertEquals(3.0, d.getValue(1, 0, 2), 0.01); + Assert.assertEquals(4.0, d.getValue(1, 1, 2), 0.01); + Assert.assertEquals(5.0, d.getValue(2, 0, 2), 0.01); + Assert.assertEquals(6.0, d.getValue(2, 1, 2), 0.01); + } + + @Test + public void testGetValueSingleColumn() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0}, 1); + Assert.assertEquals(1.0, d.getValue(0, 0, 1), 0.01); + Assert.assertEquals(2.0, d.getValue(1, 0, 1), 0.01); + Assert.assertEquals(3.0, d.getValue(2, 0, 1), 0.01); + } + + @Test + public void testGetDictType() { + DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); + Assert.assertEquals(DictType.Delta, d.getDictType()); + } + + @Test + public void testGetString() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0, 4.0}, 2); + String result = d.getString(2); + String expected = "1.0, 2.0\n3.0, 4.0"; + Assert.assertEquals(expected, result); + } + + @Test + public void testGetStringSingleColumn() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0}, 1); + String result = d.getString(1); + String expected = "1.0\n2.0\n3.0"; + Assert.assertEquals(expected, result); + } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java index 1da2af43246..8cb3d93a58c 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java @@ -23,7 +23,6 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory; import org.apache.sysds.runtime.compress.estim.encoding.EmptyEncoding; import org.apache.sysds.runtime.compress.estim.encoding.IEncode; @@ -83,7 +82,7 @@ public void testCreateFromMatrixBlockDeltaFirstRowAsIs() { IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); assertNotNull("Encoding should not be null", encoding); assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); - assertEquals("First row [5,10] stored as-is, delta [0,0] for row 1, so 2 unique: [5,10] and [0,0]", 2, encoding.getUnique()); + assertEquals("First row [5,10] stored as-is, delta [0,0] for row 1. Map has 2 unique: [5,10] and [0,0]. With zero=true, unique = 2 + 1 = 3", 3, encoding.getUnique()); } @Test @@ -195,15 +194,6 @@ public void testCreateFromMatrixBlockDeltaZeros() { assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); } - @Test(expected = NotImplementedException.class) - public void testCreateFromMatrixBlockDeltaTransposed() { - MatrixBlock mb = new MatrixBlock(10, 10, false); - mb.allocateDenseBlock(); - mb.set(0, 0, 1); - mb.set(0, 1, 2); - mb.setNonZeros(2); - EncodingFactory.createFromMatrixBlockDelta(mb, true, ColIndexFactory.create(2)); - } @Test public void testCreateFromMatrixBlockDeltaLargeMatrix() { @@ -218,7 +208,7 @@ public void testCreateFromMatrixBlockDeltaLargeMatrix() { IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(3)); assertNotNull("Encoding should not be null", encoding); assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); - assertEquals("First row [0,0,0] stored as-is, all deltas are [1,2,3], so 2 unique: [0,0,0] and [1,2,3]", 2, encoding.getUnique()); + assertEquals("First row [0,0,0] stored as-is, all deltas are [1,2,3]. Map has 2 unique: [0,0,0] and [1,2,3]. All rows have non-zero deltas, so offsets.size()=100=ru, zero=false, unique=2", 2, encoding.getUnique()); assertEquals("Should have 100 rows in mapping", 100, ((DenseEncoding) encoding).getMap().size()); } @@ -391,5 +381,88 @@ public void testCombineDenseDeltaEncodingsWithDifferentDeltas() { 4, ((DenseEncoding) combined).getMap().size()); } + @Test + public void testCreateFromMatrixBlockDeltaDensePath() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(3, 0, 13); + mb.set(3, 1, 23); + mb.set(4, 0, 14); + mb.set(4, 1, 24); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Should result in DenseEncoding (5 non-zero rows >= 10/4=2.5, so dense path)", + encoding instanceof DenseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + + @Test + public void testCreateFromMatrixBlockDeltaEmptyEncoding() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Empty matrix should result in EmptyEncoding", encoding instanceof EmptyEncoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaConstEncoding() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10); + mb.set(i, 1, 20); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 5); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Constant matrix with delta encoding: first row is absolute [10,20], rest are deltas [0,0], so map.size()=2, not ConstEncoding", + encoding instanceof DenseEncoding || encoding instanceof SparseEncoding); + assertTrue("Should have 2 unique values (first row absolute, rest are zero deltas)", encoding.getUnique() >= 2); + } + + + @Test + public void testCreateFromMatrixBlockDeltaSparseEncoding() { + MatrixBlock mb = new MatrixBlock(20, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 20); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Sparse matrix with few non-zero rows (3 < 20/4=5) should result in SparseEncoding", + encoding instanceof SparseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + + @Test + public void testCreateFromMatrixBlockDeltaDenseWithZero() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(3, 0, 13); + mb.set(3, 1, 23); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Sparse matrix with some non-zero rows (4 >= 10/4=2.5 but 4 < 10) should result in DenseEncoding with zero=true", + encoding instanceof DenseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReaderColumnSelectionSparseDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReaderColumnSelectionSparseDeltaTest.java new file mode 100644 index 00000000000..37aeb8fb987 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReaderColumnSelectionSparseDeltaTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.readers; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ReaderColumnSelectionSparseDeltaTest { + + @Test + public void testSparseDeltaReaderEmptyRowSkips() { + MatrixBlock mb = new MatrixBlock(4, 3, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 0, 1.0); + mb.appendValue(2, 0, 5.0); + mb.appendValue(3, 2, 10.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {0}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertEquals(1.0, row0.getData()[0], 0.0); + + DblArray row1 = reader.nextRow(); + assertEquals(-1.0, row1.getData()[0], 0.0); + + DblArray row2 = reader.nextRow(); + assertEquals(5.0, row2.getData()[0], 0.0); + + DblArray row3 = reader.nextRow(); + assertEquals(-5.0, row3.getData()[0], 0.0); + } + + @Test + public void testSparseDeltaReaderTargetSmallerThanSparse() { + MatrixBlock mb = new MatrixBlock(2, 5, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 1, 10.0); + mb.appendValue(0, 3, 20.0); + + mb.appendValue(1, 2, 30.0); + mb.appendValue(1, 4, 40.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {0, 2}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertEquals(0.0, row0.getData()[0], 0.0); + assertEquals(0.0, row0.getData()[1], 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertEquals(0.0, row1.getData()[0], 0.0); + assertEquals(30.0, row1.getData()[1], 0.0); + } + + @Test + public void testSparseDeltaReaderColumnIndexAheadOfSparse() { + MatrixBlock mb = new MatrixBlock(2, 10, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 1, 10.0); + mb.appendValue(0, 2, 15.0); + + mb.appendValue(1, 1, 20.0); + mb.appendValue(1, 2, 25.0); + mb.appendValue(1, 3, 30.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {3, 4}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertEquals(0.0, row0.getData()[0], 0.0); + assertEquals(0.0, row0.getData()[1], 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertEquals(30.0, row1.getData()[0], 0.0); + assertEquals(0.0, row1.getData()[1], 0.0); + } + + @Test + public void testSparseDeltaReaderColumnIndexBehindSparse() { + MatrixBlock mb = new MatrixBlock(2, 10, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 3, 10.0); + mb.appendValue(0, 5, 20.0); + + mb.appendValue(1, 1, 30.0); + mb.appendValue(1, 7, 40.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {1, 3, 5}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertEquals(0.0, row0.getData()[0], 0.0); + assertEquals(10.0, row0.getData()[1], 0.0); + assertEquals(20.0, row0.getData()[2], 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertEquals(30.0, row1.getData()[0], 0.0); + assertEquals(-10.0, row1.getData()[1], 0.0); + assertEquals(-20.0, row1.getData()[2], 0.0); + } +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java index eb4df2f47eb..cf6e3627141 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java @@ -25,17 +25,22 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.DMLCompressionException; +import java.io.DataOutput; +import java.io.IOException; + import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.indexes.IIterate; import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection; import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionDenseSingleBlockDelta; import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionDenseMultiBlockDelta; import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionSparseDelta; import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionEmpty; import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.data.DenseBlockFP64; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.junit.Test; @@ -73,20 +78,6 @@ public void testDeltaReaderDenseSingleBlockBasic() { assertNull(reader.nextRow()); } - @Test - public void testDeltaReaderFirstRowAsIs() { - MatrixBlock mb = new MatrixBlock(2, 2, false); - mb.allocateDenseBlock(); - mb.set(0, 0, 5); - mb.set(0, 1, 10); - mb.set(1, 0, 7); - mb.set(1, 1, 12); - - ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); - DblArray row0 = reader.nextRow(); - assertArrayEquals(new double[] {5, 10}, row0.getData(), 0.0); - } - @Test public void testDeltaReaderNegativeValues() { MatrixBlock mb = new MatrixBlock(3, 2, false); @@ -257,13 +248,6 @@ public void testDeltaReaderInvalidRange() { ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false, 10, 9); } - @Test(expected = NotImplementedException.class) - public void testDeltaReaderTransposed() { - MatrixBlock mb = new MatrixBlock(10, 10, false); - mb.allocateDenseBlock(); - mb.setNonZeros(100); - ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), true); - } @Test public void testDeltaReaderLargeMatrix() { @@ -319,5 +303,352 @@ public void testDeltaReaderEmptyMatrixSparse() { assertNull(reader.nextRow()); } + @Test + public void testDeltaReaderDenseMultiBlock() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertNotNull(row2); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderDenseMultiBlockSingleRow() { + MatrixBlock mb = new MatrixBlock(1, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderDenseMultiBlockNegativeValues() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 8); + mb.set(1, 1, 15); + mb.set(2, 0, 12); + mb.set(2, 1, 25); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {-2, -5}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {4, 10}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderDenseMultiBlockColumnSelection() { + MatrixBlock mb = new MatrixBlock(3, 4, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(0, 2, 30); + mb.set(0, 3, 40); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(1, 2, 31); + mb.set(1, 3, 41); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(2, 2, 32); + mb.set(2, 3, 42); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.createI(0, 2), false); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 30}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderDenseMultiBlockWithRange() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false, 1, 4); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {11, 21}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + DblArray row3 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row3.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderDenseMultiBlockZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 0); + mb.set(1, 0, 5); + mb.set(1, 1, 0); + mb.set(2, 0, 0); + mb.set(2, 1, 5); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {5, 0}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {0, 0}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {-5, 5}, row2.getData(), 0.0); + } + + @Test(expected = DMLCompressionException.class) + public void testDeltaReaderEmptyColumnIndices() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + IColIndex emptyColIndex = new EmptyColIndexMock(); + ReaderColumnSelection.createDeltaReader(mb, emptyColIndex, false); + } + + private static class DenseBlockFP64Mock extends DenseBlockFP64 { + private static final long serialVersionUID = -3601232958390554672L; + + public DenseBlockFP64Mock(int nRow, int nCol, double[] data) { + super(new int[] {nRow, nCol}, data); + } + + @Override + public boolean isContiguous() { + return false; + } + + @Override + public int numBlocks() { + return 2; + } + } + + private static class EmptyColIndexMock implements IColIndex { + @Override + public int size() { + return 0; + } + + @Override + public int get(int i) { + throw new IndexOutOfBoundsException(); + } + + @Override + public IColIndex combine(IColIndex other) { + throw new UnsupportedOperationException(); + } + + @Override + public IColIndex shift(int i) { + throw new UnsupportedOperationException(); + } + + @Override + public IColIndex.SliceResult slice(int l, int u) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean contains(int i) { + return false; + } + + @Override + public boolean contains(IColIndex a, IColIndex b) { + return false; + } + + @Override + public boolean containsStrict(IColIndex a, IColIndex b) { + return false; + } + + @Override + public boolean containsAny(IColIndex idx) { + return false; + } + + @Override + public int findIndex(int i) { + return -1; + } + + @Override + public boolean equals(Object other) { + return other instanceof IColIndex && equals((IColIndex) other); + } + + @Override + public boolean equals(IColIndex other) { + return other != null && other.size() == 0; + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public IIterate iterator() { + return new IIterate() { + @Override + public boolean hasNext() { + return false; + } + + @Override + public int next() { + throw new java.util.NoSuchElementException(); + } + + @Override + public int v() { + throw new java.util.NoSuchElementException(); + } + + @Override + public int i() { + return -1; + } + }; + } + + @Override + public void write(DataOutput out) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long getExactSizeOnDisk() { + return 0; + } + + @Override + public long estimateInMemorySize() { + return 0; + } + + @Override + public boolean isContiguous() { + return false; + } + + @Override + public int[] getReorderingIndex() { + return new int[0]; + } + + @Override + public boolean isSorted() { + return true; + } + + @Override + public IColIndex sort() { + return this; + } + + @Override + public double avgOfIndex() { + return 0; + } + + @Override + public void decompressToDenseFromSparse(org.apache.sysds.runtime.data.SparseBlock sb, int vr, int off, double[] c) { + throw new UnsupportedOperationException(); + } + + @Override + public void decompressVec(int nCol, double[] c, int off, double[] values, int rowIdx) { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "EmptyColIndexMock[]"; + } + } + }