diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java index f6321bc1b6d..af944fce750 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java @@ -133,11 +133,14 @@ public class CompressionSettings { public final double[] scaleFactors; + public final boolean preferDeltaEncoding; + protected CompressionSettings(double samplingRatio, double samplePower, boolean allowSharedDictionary, String transposeInput, int seed, boolean lossy, EnumSet validCompressions, boolean sortValuesByLength, PartitionerType columnPartitioner, int maxColGroupCoCode, double coCodePercentage, int minimumSampleSize, int maxSampleSize, EstimationType estimationType, CostType costComputationType, - double minimumCompressionRatio, boolean isInSparkInstruction, SORT_TYPE sdcSortType, double[] scaleFactors) { + double minimumCompressionRatio, boolean isInSparkInstruction, SORT_TYPE sdcSortType, double[] scaleFactors, + boolean preferDeltaEncoding) { this.samplingRatio = samplingRatio; this.samplePower = samplePower; this.allowSharedDictionary = allowSharedDictionary; @@ -157,6 +160,7 @@ protected CompressionSettings(double samplingRatio, double samplePower, boolean this.isInSparkInstruction = isInSparkInstruction; this.sdcSortType = sdcSortType; this.scaleFactors = scaleFactors; + this.preferDeltaEncoding = preferDeltaEncoding; if(!printedStatus && LOG.isDebugEnabled()) { printedStatus = true; diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java index ae6a0b2d231..02c9f97498d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java @@ -53,6 +53,7 @@ public class CompressionSettingsBuilder { private boolean isInSparkInstruction = false; private SORT_TYPE sdcSortType = SORT_TYPE.MATERIALIZE; private double[] scaleFactors = null; + private boolean preferDeltaEncoding = false; public CompressionSettingsBuilder() { @@ -101,6 +102,7 @@ public CompressionSettingsBuilder copySettings(CompressionSettings that) { this.maxColGroupCoCode = that.maxColGroupCoCode; this.coCodePercentage = that.coCodePercentage; this.minimumSampleSize = that.minimumSampleSize; + this.preferDeltaEncoding = that.preferDeltaEncoding; return this; } @@ -336,6 +338,19 @@ public CompressionSettingsBuilder setSDCSortType(SORT_TYPE sdcSortType) { return this; } + /** + * Set whether to prefer delta encoding during compression estimation. + * When enabled, the compression estimator will use delta encoding statistics + * instead of regular encoding statistics. + * + * @param preferDeltaEncoding Whether to prefer delta encoding + * @return The CompressionSettingsBuilder + */ + public CompressionSettingsBuilder setPreferDeltaEncoding(boolean preferDeltaEncoding) { + this.preferDeltaEncoding = preferDeltaEncoding; + return this; + } + /** * Create the CompressionSettings object to use in the compression. * @@ -345,6 +360,6 @@ public CompressionSettings create() { return new CompressionSettings(samplingRatio, samplePower, allowSharedDictionary, transposeInput, seed, lossy, validCompressions, sortValuesByLength, columnPartitioner, maxColGroupCoCode, coCodePercentage, minimumSampleSize, maxSampleSize, estimationType, costType, minimumCompressionRatio, isInSparkInstruction, - sdcSortType, scaleFactors); + sdcSortType, scaleFactors, preferDeltaEncoding); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index fc82c58e16b..ac4defcabd5 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -33,6 +33,7 @@ import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; @@ -43,6 +44,9 @@ import org.apache.sysds.runtime.compress.colgroup.indexes.RangeIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.utils.ACount; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap; import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; import org.apache.sysds.runtime.compress.colgroup.scheme.DDCScheme; @@ -77,7 +81,7 @@ public class ColGroupDDC extends APreAgg implements IMapToDataGroup { static final VectorSpecies SPECIES = DoubleVector.SPECIES_PREFERRED; - private ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { + protected ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { super(colIndexes, dict, cachedCounts); _data = data; @@ -1105,4 +1109,57 @@ protected boolean allowShallowIdentityRightMult() { return true; } + public AColGroup convertToDeltaDDC() { + int numCols = _colIndexes.size(); + int numRows = _data.size(); + + DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(numRows, 64)); + double[] rowDelta = new double[numCols]; + double[] prevRow = new double[numCols]; + DblArray dblArray = new DblArray(rowDelta); + int[] rowToDictId = new int[numRows]; + + double[] dictVals = _dict.getValues(); + + for(int i = 0; i < numRows; i++) { + int dictIdx = _data.getIndex(i); + int off = dictIdx * numCols; + for(int j = 0; j < numCols; j++) { + double val = dictVals[off + j]; + if(i == 0) { + rowDelta[j] = val; + prevRow[j] = val; + } else { + rowDelta[j] = val - prevRow[j]; + prevRow[j] = val; + } + } + + rowToDictId[i] = map.increment(dblArray); + } + + if(map.size() == 0) + return new ColGroupEmpty(_colIndexes); + + ACount[] vals = map.extractValues(); + final int nVals = vals.length; + final double[] dictValues = new double[nVals * numCols]; + final int[] oldIdToNewId = new int[map.size()]; + int idx = 0; + for(int i = 0; i < nVals; i++) { + final ACount dac = vals[i]; + final double[] arrData = dac.key().getData(); + System.arraycopy(arrData, 0, dictValues, idx, numCols); + oldIdToNewId[dac.id] = i; + idx += numCols; + } + + DeltaDictionary deltaDict = new DeltaDictionary(dictValues, numCols); + AMapToData newData = MapToFactory.create(numRows, nVals); + for(int i = 0; i < numRows; i++) { + newData.set(i, oldIdToNewId[rowToDictId[i]]); + } + return ColGroupDeltaDDC.create(_colIndexes, deltaDict, newData, null); + } + } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java index 2666860ca68..08bdfd1e1d8 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDeltaDDC.java @@ -19,62 +19,559 @@ package org.apache.sysds.runtime.compress.colgroup; +import java.io.DataInput; +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.DMLCompressionException; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.utils.ACount; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap; +import org.apache.sysds.runtime.compress.utils.DoubleCountHashMap; +import org.apache.sysds.runtime.compress.utils.Util; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.data.SparseBlockMCSR; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.Divide; +import org.apache.sysds.runtime.functionobjects.Minus; +import org.apache.sysds.runtime.functionobjects.Multiply; +import org.apache.sysds.runtime.functionobjects.Plus; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; + /** * Class to encapsulate information about a column group that is first delta encoded then encoded with dense dictionary * encoding (DeltaDDC). */ -public class ColGroupDeltaDDC { // extends ColGroupDDC - -// private static final long serialVersionUID = -1045556313148564147L; - -// /** Constructor for serialization */ -// protected ColGroupDeltaDDC() { -// } - -// private ColGroupDeltaDDC(int[] colIndexes, ADictionary dict, AMapToData data, int[] cachedCounts) { -// super(); -// LOG.info("Carefully use of DeltaDDC since implementation is not finished."); -// _colIndexes = colIndexes; -// _dict = dict; -// _data = data; -// } - -// public static AColGroup create(int[] colIndices, ADictionary dict, AMapToData data, int[] cachedCounts) { -// if(dict == null) -// throw new NotImplementedException("Not implemented constant delta group"); -// else -// return new ColGroupDeltaDDC(colIndices, dict, data, cachedCounts); -// } - -// public CompressionType getCompType() { -// return CompressionType.DeltaDDC; -// } - -// @Override -// protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, -// double[] values) { -// final int nCol = _colIndexes.length; -// for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { -// final double[] c = db.values(offT); -// final int off = db.pos(offT) + offC; -// final int rowIndex = _data.getIndex(i) * nCol; -// final int prevOff = (off == 0) ? off : off - nCol; -// for(int j = 0; j < nCol; j++) { -// // Here we use the values in the previous row to compute current values along with the delta -// double newValue = c[prevOff + j] + values[rowIndex + j]; -// c[off + _colIndexes[j]] += newValue; -// } -// } -// } - -// @Override -// protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, -// double[] values) { -// throw new NotImplementedException(); -// } - -// @Override -// public AColGroup scalarOperation(ScalarOperator op) { -// return new ColGroupDeltaDDC(_colIndexes, _dict.applyScalarOp(op), _data, getCachedCounts()); -// } +public class ColGroupDeltaDDC extends ColGroupDDC { + private static final long serialVersionUID = -1045556313148564147L; + + private ColGroupDeltaDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { + super(colIndexes, dict, data, cachedCounts); + if(CompressedMatrixBlock.debug) { + if(!(dict instanceof DeltaDictionary)) + throw new DMLCompressionException("DeltaDDC must use DeltaDictionary"); + } + } + + public static AColGroup create(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) { + if(dict == null) + return new ColGroupEmpty(colIndexes); + + if(!(dict instanceof DeltaDictionary)) + throw new DMLCompressionException("ColGroupDeltaDDC must use DeltaDictionary"); + + if(data.getUnique() == 1) { + DeltaDictionary deltaDict = (DeltaDictionary) dict; + double[] values = deltaDict.getValues(); + final int nCol = colIndexes.size(); + boolean allZeros = true; + for(int i = 0; i < nCol; i++) { + if(!Util.eq(values[i], 0.0)) { + allZeros = false; + break; + } + } + if(allZeros) { + double[] constValues = new double[nCol]; + System.arraycopy(values, 0, constValues, 0, nCol); + return ColGroupConst.create(colIndexes, Dictionary.create(constValues)); + } + } + + return new ColGroupDeltaDDC(colIndexes, dict, data, cachedCounts); + } + + @Override + public CompressionType getCompType() { + return CompressionType.DeltaDDC; + } + + @Override + public ColGroupType getColGroupType() { + return ColGroupType.DeltaDDC; + } + + public static ColGroupDeltaDDC read(DataInput in) throws IOException { + IColIndex cols = ColIndexFactory.read(in); + IDictionary dict = DictionaryFactory.read(in); + AMapToData data = MapToFactory.readIn(in); + return new ColGroupDeltaDDC(cols, dict, data, null); + } + + @Override + protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + double[] values) { + final int nCol = _colIndexes.size(); + final double[] prevRow = new double[nCol]; + + if(rl > 0) { + final int dictIdx0 = _data.getIndex(0); + final int rowIndex0 = dictIdx0 * nCol; + for(int j = 0; j < nCol; j++) { + prevRow[j] = values[rowIndex0 + j]; + } + for(int i = 1; i < rl; i++) { + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + for(int j = 0; j < nCol; j++) { + prevRow[j] += values[rowIndex + j]; + } + } + } + + if(db.isContiguous() && nCol == db.getDim(1) && offC == 0) { + final int nColOut = db.getDim(1); + final double[] c = db.values(0); + for(int i = rl; i < ru; i++) { + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + final int rowBaseOff = (i + offR) * nColOut; + + if(i == 0 && rl == 0) { + for(int j = 0; j < nCol; j++) { + final double value = values[rowIndex + j]; + c[rowBaseOff + j] = value; + prevRow[j] = value; + } + } + else { + for(int j = 0; j < nCol; j++) { + final double delta = values[rowIndex + j]; + final double newValue = prevRow[j] + delta; + c[rowBaseOff + j] = newValue; + prevRow[j] = newValue; + } + } + } + } + else { + for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { + final double[] c = db.values(offT); + final int off = db.pos(offT) + offC; + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + + if(i == 0 && rl == 0) { + for(int j = 0; j < nCol; j++) { + final double value = values[rowIndex + j]; + final int colIdx = _colIndexes.get(j); + c[off + colIdx] = value; + prevRow[j] = value; + } + } + else { + for(int j = 0; j < nCol; j++) { + final double delta = values[rowIndex + j]; + final double newValue = prevRow[j] + delta; + final int colIdx = _colIndexes.get(j); + c[off + colIdx] = newValue; + prevRow[j] = newValue; + } + } + } + } + } + + @Override + protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + double[] values) { + final int nCol = _colIndexes.size(); + final double[] prevRow = new double[nCol]; + + if(rl > 0) { + final int dictIdx0 = _data.getIndex(0); + final int rowIndex0 = dictIdx0 * nCol; + for(int j = 0; j < nCol; j++) { + prevRow[j] = values[rowIndex0 + j]; + } + for(int i = 1; i < rl; i++) { + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + for(int j = 0; j < nCol; j++) { + prevRow[j] += values[rowIndex + j]; + } + } + } + + for(int i = rl, offT = rl + offR; i < ru; i++, offT++) { + final int dictIdx = _data.getIndex(i); + final int rowIndex = dictIdx * nCol; + + if(i == 0 && rl == 0) { + for(int j = 0; j < nCol; j++) { + final double value = values[rowIndex + j]; + final int colIdx = _colIndexes.get(j); + ret.append(offT, colIdx + offC, value); + prevRow[j] = value; + } + } + else { + for(int j = 0; j < nCol; j++) { + final double delta = values[rowIndex + j]; + final double newValue = prevRow[j] + delta; + final int colIdx = _colIndexes.get(j); + ret.append(offT, colIdx + offC, newValue); + prevRow[j] = newValue; + } + } + } + } + + @Override + protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, + SparseBlock sb) { + throw new NotImplementedException("Dense block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, + SparseBlock sb) { + throw new NotImplementedException("Sparse block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToDenseBlockTransposedSparseDictionary(DenseBlock db, int rl, int ru, SparseBlock sb) { + throw new NotImplementedException("Transposed dense block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToDenseBlockTransposedDenseDictionary(DenseBlock db, int rl, int ru, double[] dict) { + throw new NotImplementedException("Transposed dense block decompression from dense dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToSparseBlockTransposedSparseDictionary(SparseBlockMCSR sbr, SparseBlock sb, int nColOut) { + throw new NotImplementedException("Transposed sparse block decompression from sparse dictionary for DeltaDDC not yet implemented"); + } + + @Override + protected void decompressToSparseBlockTransposedDenseDictionary(SparseBlockMCSR sbr, double[] dict, int nColOut) { + throw new NotImplementedException("Transposed sparse block decompression from dense dictionary for DeltaDDC not yet implemented"); + } + + @Override + public AColGroup scalarOperation(ScalarOperator op) { + if(op.fn instanceof Multiply || op.fn instanceof Divide) { + double[] val = _dict.getValues(); + double[] newVal = new double[val.length]; + for(int i = 0; i < val.length; i++) + newVal[i] = op.executeScalar(val[i]); + return create(_colIndexes, new DeltaDictionary(newVal, _colIndexes.size()), _data, getCounts()); + } + else if(op.fn instanceof Plus || op.fn instanceof Minus) { + return scalarOperationShift(op); + } + else { + AColGroup ddc = convertToDDC(); + return ddc.scalarOperation(op); + } + } + + private AColGroup scalarOperationShift(ScalarOperator op) { + final int nCol = _colIndexes.size(); + final int id0 = _data.getIndex(0); + final double[] vals = _dict.getValues(); + final double[] tuple0 = new double[nCol]; + for(int j = 0; j < nCol; j++) + tuple0[j] = vals[id0 * nCol + j]; + + final double[] tupleNew = new double[nCol]; + for(int j = 0; j < nCol; j++) + tupleNew[j] = op.executeScalar(tuple0[j]); + + int[] counts = getCounts(); + if(counts[id0] == 1) { + double[] newVals = vals.clone(); + for(int j = 0; j < nCol; j++) + newVals[id0 * nCol + j] = tupleNew[j]; + return create(_colIndexes, new DeltaDictionary(newVals, nCol), _data, counts); + } + else { + int idNew = -1; + int nEntries = vals.length / nCol; + for(int k = 0; k < nEntries; k++) { + boolean match = true; + for(int j = 0; j < nCol; j++) { + if(vals[k * nCol + j] != tupleNew[j]) { + match = false; + break; + } + } + if(match) { + idNew = k; + break; + } + } + + IDictionary newDict = _dict; + if(idNew == -1) { + double[] newVals = Arrays.copyOf(vals, vals.length + nCol); + System.arraycopy(tupleNew, 0, newVals, vals.length, nCol); + newDict = new DeltaDictionary(newVals, nCol); + idNew = nEntries; + } + + AMapToData newData = MapToFactory.create(_data.size(), Math.max(_data.getUpperBoundValue(), idNew) + 1); + for(int i = 0; i < _data.size(); i++) + newData.set(i, _data.getIndex(i)); + newData.set(0, idNew); + + return create(_colIndexes, newDict, newData, null); + } + } + + @Override + public AColGroup unaryOperation(UnaryOperator op) { + AColGroup ddc = convertToDDC(); + return ddc.unaryOperation(op); + } + + @Override + public void leftMultByMatrixNoPreAgg(MatrixBlock matrix, MatrixBlock result, int rl, int ru, int cl, int cu) { + throw new NotImplementedException("Left matrix multiplication not supported for DeltaDDC"); + } + + @Override + public void rightDecompressingMult(MatrixBlock right, MatrixBlock ret, int rl, int ru, int nRows, int crl, int cru) { + throw new NotImplementedException("Right matrix multiplication not supported for DeltaDDC"); + } + + @Override + public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) { + throw new NotImplementedException("Pre-aggregate dense not supported for DeltaDDC"); + } + + @Override + public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru, int cl, int cu) { + throw new NotImplementedException("Pre-aggregate sparse not supported for DeltaDDC"); + } + + @Override + public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate DDC structure not supported for DeltaDDC"); + } + + @Override + public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate SDCZeros structure not supported for DeltaDDC"); + } + + @Override + public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate SDCSingleZeros structure not supported for DeltaDDC"); + } + + @Override + protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) { + throw new NotImplementedException("Pre-aggregate RLE structure not supported for DeltaDDC"); + } + + @Override + protected double computeMxx(double c, Builtin builtin) { + throw new NotImplementedException("Compute Min/Max not supported for DeltaDDC"); + } + + @Override + protected void computeColMxx(double[] c, Builtin builtin) { + throw new NotImplementedException("Compute Column Min/Max not supported for DeltaDDC"); + } + + @Override + protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) { + throw new NotImplementedException("Compute Row Min/Max not supported for DeltaDDC"); + } + + @Override + protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) { + throw new NotImplementedException("Compute Row Sums not supported for DeltaDDC"); + } + + @Override + protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) { + throw new NotImplementedException("Compute Row Product not supported for DeltaDDC"); + } + + @Override + public boolean containsValue(double pattern) { + throw new NotImplementedException("Contains value not supported for DeltaDDC"); + } + + @Override + public AColGroup append(AColGroup g) { + throw new NotImplementedException("Append not supported for DeltaDDC"); + } + + @Override + public AColGroup appendNInternal(AColGroup[] g, int blen, int rlen) { + throw new NotImplementedException("AppendN not supported for DeltaDDC"); + } + + @Override + public long getNumberNonZeros(int nRows) { + long nnz = 0; + final int nCol = _colIndexes.size(); + final double[] prevRow = new double[nCol]; + + for(int i = 0; i < nRows; i++) { + final int dictIdx = _data.getIndex(i); + final double[] vals = _dict.getValues(); + final int rowIndex = dictIdx * nCol; + + if(i == 0) { + for(int j = 0; j < nCol; j++) { + double val = vals[rowIndex + j]; + prevRow[j] = val; + if(val != 0) + nnz++; + } + } + else { + for(int j = 0; j < nCol; j++) { + double val = prevRow[j] + vals[rowIndex + j]; + prevRow[j] = val; + if(val != 0) + nnz++; + } + } + } + return nnz; + } + + @Override + public AColGroup sliceRows(int rl, int ru) { + final int nCol = _colIndexes.size(); + double[] firstRowValues = new double[nCol]; + double[] dictVals = ((DeltaDictionary)_dict).getValues(); + + for(int i = 0; i <= rl; i++) { + int dictIdx = _data.getIndex(i); + int dictOffset = dictIdx * nCol; + if(i == 0) { + for(int j = 0; j < nCol; j++) firstRowValues[j] = dictVals[dictOffset + j]; + } else { + for(int j = 0; j < nCol; j++) firstRowValues[j] += dictVals[dictOffset + j]; + } + } + + int nEntries = dictVals.length / nCol; + int newId = -1; + for(int k = 0; k < nEntries; k++) { + boolean match = true; + for(int j = 0; j < nCol; j++) { + if(dictVals[k * nCol + j] != firstRowValues[j]) { + match = false; + break; + } + } + if(match) { + newId = k; + break; + } + } + + IDictionary newDict = _dict; + if(newId == -1) { + double[] newDictVals = Arrays.copyOf(dictVals, dictVals.length + nCol); + System.arraycopy(firstRowValues, 0, newDictVals, dictVals.length, nCol); + newDict = new DeltaDictionary(newDictVals, nCol); + newId = nEntries; + } + + int numRows = ru - rl; + AMapToData slicedData = MapToFactory.create(numRows, Math.max(_data.getUpperBoundValue(), newId) + 1); + for(int i = 0; i < numRows; i++) + slicedData.set(i, _data.getIndex(rl + i)); + + slicedData.set(0, newId); + return ColGroupDeltaDDC.create(_colIndexes, newDict, slicedData, null); + } + + private AColGroup convertToDDC() { + final int nCol = _colIndexes.size(); + final int nRow = _data.size(); + double[] values = new double[nRow * nCol]; + + double[] prevRow = new double[nCol]; + for(int i = 0; i < nRow; i++) { + final int dictIdx = _data.getIndex(i); + final double[] dictVals = _dict.getValues(); + final int rowIndex = dictIdx * nCol; + + for(int j = 0; j < nCol; j++) { + if(i == 0) { + prevRow[j] = dictVals[rowIndex + j]; + } + else { + prevRow[j] = prevRow[j] + dictVals[rowIndex + j]; + } + values[i * nCol + j] = prevRow[j]; + } + } + + return compress(values, _colIndexes); + } + + private static AColGroup compress(double[] values, IColIndex colIndexes) { + int nRow = values.length / colIndexes.size(); + int nCol = colIndexes.size(); + + if(nCol == 1) { + DoubleCountHashMap map = new DoubleCountHashMap(16); + AMapToData mapData = MapToFactory.create(nRow, 256); + for(int i = 0; i < nRow; i++) { + int id = map.increment(values[i]); + if(id >= mapData.getUpperBoundValue()) { + mapData = mapData.resize(Math.max(mapData.getUpperBoundValue() * 2, id + 1)); + } + mapData.set(i, id); + } + if(map.size() == 1) + return ColGroupConst.create(colIndexes, Dictionary.create(new double[] {map.getMostFrequent()})); + + IDictionary dict = Dictionary.create(map.getDictionary()); + return ColGroupDDC.create(colIndexes, dict, mapData.resize(map.size()), null); + } + else { + DblArrayCountHashMap map = new DblArrayCountHashMap(16); + AMapToData mapData = MapToFactory.create(nRow, 256); + DblArray dblArray = new DblArray(new double[nCol]); + for(int i = 0; i < nRow; i++) { + System.arraycopy(values, i * nCol, dblArray.getData(), 0, nCol); + int id = map.increment(dblArray); + if(id >= mapData.getUpperBoundValue()) { + mapData = mapData.resize(Math.max(mapData.getUpperBoundValue() * 2, id + 1)); + } + mapData.set(i, id); + } + if(map.size() == 1) { + ACount[] counts = map.extractValues(); + return ColGroupConst.create(colIndexes, Dictionary.create(counts[0].key().getData())); + } + + ACount[] counts = map.extractValues(); + Arrays.sort(counts, Comparator.comparingInt(x -> x.id)); + + double[] dictValues = new double[counts.length * nCol]; + for(int i = 0; i < counts.length; i++) { + System.arraycopy(counts[i].key().getData(), 0, dictValues, i * nCol, nCol); + } + + IDictionary dict = Dictionary.create(dictValues); + return ColGroupDDC.create(colIndexes, dict, mapData.resize(map.size()), null); + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index c6a098f5c32..de333b9e05d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -38,6 +38,7 @@ import org.apache.sysds.runtime.compress.bitmap.ABitmap; import org.apache.sysds.runtime.compress.bitmap.BitmapEncoder; import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; @@ -288,6 +289,12 @@ else if((ct == CompressionType.SDC || ct == CompressionType.CONST) // else if(ct == CompressionType.DDC) { return directCompressDDC(colIndexes, cg); } + else if(ct == CompressionType.DeltaDDC) { + return directCompressDeltaDDC(colIndexes, cg); + } + else if(ct == CompressionType.CONST && cs.preferDeltaEncoding) { + return directCompressDeltaDDC(colIndexes, cg); + } else if(ct == CompressionType.LinearFunctional) { if(cs.scaleFactors != null) { throw new NotImplementedException(); // quantization-fused compression NOT allowed @@ -684,6 +691,130 @@ private AColGroup directCompressDDCMultiCol(IColIndex colIndexes, CompressedSize return ColGroupDDC.create(colIndexes, dict, resData, null); } + private AColGroup directCompressDeltaDDC(IColIndex colIndexes, CompressedSizeInfoColGroup cg) throws Exception { + if(cs.transposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + if(cs.scaleFactors != null) { + throw new NotImplementedException("Delta encoding with quantization not yet implemented"); + } + + if(colIndexes.size() > 1) { + return directCompressDeltaDDCMultiCol(colIndexes, cg); + } + else { + return directCompressDeltaDDCSingleCol(colIndexes, cg); + } + } + + private AColGroup directCompressDeltaDDCSingleCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg) { + final int col = colIndexes.get(0); + final AMapToData d = MapToFactory.create(nRow, Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126)); + final DoubleCountHashMap map = new DoubleCountHashMap(cg.getNumVals()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(in, colIndexes, cs.transposed, 0, nRow); + DblArray cellVals = reader.nextRow(); + int r = 0; + while(r < nRow && cellVals != null) { + final int row = reader.getCurrentRowIndex(); + if(row == r) { + final double val = cellVals.getData()[0]; + final int id = map.increment(val); + d.set(row, id); + cellVals = reader.nextRow(); + r++; + } + else { + r = row; + } + } + + if(map.size() == 0) + return new ColGroupEmpty(colIndexes); + + final double[] dictValues = map.getDictionary(); + IDictionary dict = new DeltaDictionary(dictValues, 1); + + final int nUnique = map.size(); + final AMapToData resData = d.resize(nUnique); + return ColGroupDeltaDDC.create(colIndexes, dict, resData, null); + } + + private AColGroup directCompressDeltaDDCMultiCol(IColIndex colIndexes, CompressedSizeInfoColGroup cg) throws Exception { + final AMapToData d = MapToFactory.create(nRow, Math.max(Math.min(cg.getNumOffs() + 1, nRow), 126)); + final int fill = d.getUpperBoundValue(); + d.fill(fill); + + final DblArrayCountHashMap map = new DblArrayCountHashMap(Math.max(cg.getNumVals(), 64)); + boolean extra; + if(nRow < CompressionSettings.PAR_DDC_THRESHOLD || k < csi.getNumberColGroups() || pool == null) { + extra = readToMapDeltaDDC(colIndexes, map, d, 0, nRow, fill); + } + else { + throw new NotImplementedException("Parallel delta DDC compression not yet implemented"); + } + + if(map.size() == 0) + return new ColGroupEmpty(colIndexes); + + final ACount[] vals = map.extractValues(); + final int nVals = vals.length; + final int nTuplesOut = nVals + (extra ? 1 : 0); + final double[] dictValues = new double[nTuplesOut * colIndexes.size()]; + final int[] oldIdToNewId = new int[map.size()]; + int idx = 0; + for(int i = 0; i < nVals; i++) { + final ACount dac = vals[i]; + final double[] arrData = dac.key().getData(); + System.arraycopy(arrData, 0, dictValues, idx, colIndexes.size()); + oldIdToNewId[dac.id] = i; + idx += colIndexes.size(); + } + IDictionary dict = new DeltaDictionary(dictValues, colIndexes.size()); + + if(extra) + d.replace(fill, map.size()); + final int nUnique = map.size() + (extra ? 1 : 0); + final AMapToData resData = d.resize(nUnique); + for(int i = 0; i < nRow; i++) { + final int oldId = resData.getIndex(i); + if(extra && oldId == map.size()) { + resData.set(i, nVals); + } + else if(oldId < oldIdToNewId.length) { + resData.set(i, oldIdToNewId[oldId]); + } + } + return ColGroupDeltaDDC.create(colIndexes, dict, resData, null); + } + + private boolean readToMapDeltaDDC(IColIndex colIndexes, DblArrayCountHashMap map, AMapToData data, int rl, int ru, + int fill) { + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(in, colIndexes, cs.transposed, rl, ru); + + DblArray cellVals = reader.nextRow(); + boolean extra = false; + int r = rl; + while(r < ru && cellVals != null) { + final int row = reader.getCurrentRowIndex(); + if(row == r) { + final int id = map.increment(cellVals); + data.set(row, id); + cellVals = reader.nextRow(); + r++; + } + else { + r = row; + extra = true; + } + } + + if(r < ru) + extra = true; + + return extra; + } + private boolean readToMapDDC(IColIndex colIndexes, DblArrayCountHashMap map, AMapToData data, int rl, int ru, int fill) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java index 91442281317..b47100d4e64 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java @@ -105,10 +105,12 @@ public static AColGroup readColGroup(DataInput in, int nRows) throws IOException switch(ctype) { case DDC: return ColGroupDDC.read(in); - case DDCFOR: - return ColGroupDDCFOR.read(in); - case OLE: - return ColGroupOLE.read(in, nRows); + case DDCFOR: + return ColGroupDDCFOR.read(in); + case DeltaDDC: + return ColGroupDeltaDDC.read(in); + case OLE: + return ColGroupOLE.read(in, nRows); case RLE: return ColGroupRLE.read(in, nRows); case CONST: diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java index d67ab95f824..d667e76ed5e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DeltaDictionary.java @@ -19,14 +19,13 @@ package org.apache.sysds.runtime.compress.colgroup.dictionary; +import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.functionobjects.Divide; -import org.apache.sysds.runtime.functionobjects.Minus; import org.apache.sysds.runtime.functionobjects.Multiply; -import org.apache.sysds.runtime.functionobjects.Plus; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; /** @@ -50,26 +49,22 @@ public double[] getValues(){ return _values; } + @Override + public double getValue(int i, int col, int nCol) { + return _values[i * nCol + col]; + } + @Override public DeltaDictionary applyScalarOp(ScalarOperator op) { - final double[] retV = new double[_values.length]; if(op.fn instanceof Multiply || op.fn instanceof Divide) { + final double[] retV = new double[_values.length]; for(int i = 0; i < _values.length; i++) retV[i] = op.executeScalar(_values[i]); + return new DeltaDictionary(retV, _numCols); } - else if(op.fn instanceof Plus || op.fn instanceof Minus) { - // With Plus and Minus only the first row needs to be updated when delta encoded - for(int i = 0; i < _values.length; i++) { - if(i < _numCols) - retV[i] = op.executeScalar(_values[i]); - else - retV[i] = _values[i]; - } + else { + throw new NotImplementedException("Scalar op " + op.fn.getClass().getSimpleName() + " not supported in DeltaDictionary"); } - else - throw new NotImplementedException(); - - return new DeltaDictionary(retV, _numCols); } @Override @@ -79,17 +74,30 @@ public long getInMemorySize() { @Override public void write(DataOutput out) throws IOException { - throw new NotImplementedException(); + out.writeByte(DictionaryFactory.Type.DELTA_DICT.ordinal()); + out.writeInt(_numCols); + out.writeInt(_values.length); + for(int i = 0; i < _values.length; i++) + out.writeDouble(_values[i]); + } + + public static DeltaDictionary read(DataInput in) throws IOException { + int numCols = in.readInt(); + int numValues = in.readInt(); + double[] values = new double[numValues]; + for(int i = 0; i < numValues; i++) + values[i] = in.readDouble(); + return new DeltaDictionary(values, numCols); } @Override public long getExactSizeOnDisk() { - throw new NotImplementedException(); + return 1 + 4 + 4 + 8L * _values.length; } @Override public DictType getDictType() { - throw new NotImplementedException(); + return DictType.Delta; } @Override @@ -104,12 +112,19 @@ public int getNumberOfColumns(int nrow){ @Override public String getString(int colIndexes) { - throw new NotImplementedException(); + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < _values.length; i++) { + sb.append(_values[i]); + if(i != _values.length - 1) { + sb.append((i + 1) % colIndexes == 0 ? "\n" : ", "); + } + } + return sb.toString(); } @Override public long getNumberNonZeros(int[] counts, int nCol) { - throw new NotImplementedException(); + throw new NotImplementedException("Cannot calculate non-zeros from DeltaDictionary alone"); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java index f88ac99b87b..005d14f9ce1 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/DictionaryFactory.java @@ -52,7 +52,7 @@ public interface DictionaryFactory { static final Log LOG = LogFactory.getLog(DictionaryFactory.class.getName()); public enum Type { - FP64_DICT, MATRIX_BLOCK_DICT, INT8_DICT, IDENTITY, IDENTITY_SLICE, PLACE_HOLDER + FP64_DICT, MATRIX_BLOCK_DICT, INT8_DICT, IDENTITY, IDENTITY_SLICE, PLACE_HOLDER, DELTA_DICT } public static IDictionary read(DataInput in) throws IOException { @@ -68,6 +68,8 @@ public static IDictionary read(DataInput in) throws IOException { return IdentityDictionary.read(in); case IDENTITY_SLICE: return IdentityDictionarySlice.read(in); + case DELTA_DICT: + return DeltaDictionary.read(in); case MATRIX_BLOCK_DICT: default: return MatrixBlockDictionary.read(in); diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java b/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java index 2dce0bafe4e..ef7981e941b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java @@ -197,7 +197,10 @@ public final CompressedSizeInfoColGroup combine(IColIndex combinedColumns, Compr return null; // This combination is clearly not a good idea return null to indicate that. else if(g1.getMap() == null || g2.getMap() == null) // the previous information did not contain maps, therefore fall back to extract from sample - return getColGroupInfo(combinedColumns, Math.max(g1V, g2V), (int) max); + if(_cs.preferDeltaEncoding) + return getDeltaColGroupInfo(combinedColumns, Math.max(g1V, g2V), (int) max); + else + return getColGroupInfo(combinedColumns, Math.max(g1V, g2V), (int) max); else // Default combine the previous subject to max value calculated. return combine(combinedColumns, g1, g2, (int) max); } @@ -254,8 +257,12 @@ private List CompressedSizeInfoColGroupSingleThread( List ret = new ArrayList<>(clen); if(!_cs.transposed && !_data.isEmpty() && _data.isInSparseFormat()) nnzCols = LibMatrixReorg.countNnzPerColumn(_data); - for(int col = 0; col < clen; col++) - ret.add(getColGroupInfo(new SingleIndex(col))); + for(int col = 0; col < clen; col++) { + if(_cs.preferDeltaEncoding) + ret.add(getDeltaColGroupInfo(new SingleIndex(col))); + else + ret.add(getColGroupInfo(new SingleIndex(col))); + } return ret; } @@ -286,9 +293,14 @@ private List CompressedSizeInfoColGroupParallel(int for(int col = 0; col < clen; col += blkz) { final int start = col; final int end = Math.min(clen, col + blkz); + final boolean useDelta = _cs.preferDeltaEncoding; tasks.add(pool.submit(() -> { - for(int c = start; c < end; c++) - res[c] = getColGroupInfo(new SingleIndex(c)); + for(int c = start; c < end; c++) { + if(useDelta) + res[c] = getDeltaColGroupInfo(new SingleIndex(c)); + else + res[c] = getColGroupInfo(new SingleIndex(c)); + } return null; })); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java index 963a044d14f..df353931c0b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java @@ -255,7 +255,11 @@ private static double getCompressionSize(IColIndex cols, CompressionType ct, Est case LinearFunctional: return ColGroupSizes.estimateInMemorySizeLinearFunctional(numCols, contiguousColumns); case DeltaDDC: - throw new NotImplementedException(); + // DeltaDDC has the same size estimation as DDC since it uses the same structure + // The delta encoding is just a different way of interpreting the data + nv = fact.numVals + (fact.numOffs < fact.numRows ? 1 : 0); + return ColGroupSizes.estimateInMemorySizeDDC(numCols, contiguousColumns, nv, fact.numRows, + fact.tupleSparsity, fact.lossy); case DDC: nv = fact.numVals + (fact.numOffs < fact.numRows ? 1 : 0); return ColGroupSizes.estimateInMemorySizeDDC(numCols, contiguousColumns, nv, fact.numRows, diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java index b196da658c3..068b79f9864 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java @@ -88,9 +88,8 @@ else if(rowCols.size() == 1) { * @return A delta encoded encoding. */ public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean transposed, IColIndex rowCols) { - throw new NotImplementedException(); - // final int sampleSize = transposed ? m.getNumColumns() : m.getNumRows(); - // return createFromMatrixBlockDelta(m, transposed, rowCols, sampleSize); + final int sampleSize = transposed ? m.getNumColumns() : m.getNumRows(); + return createFromMatrixBlockDelta(m, transposed, rowCols, sampleSize); } /** @@ -107,7 +106,7 @@ public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean transpos */ public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean transposed, IColIndex rowCols, int sampleSize) { - throw new NotImplementedException(); + return createWithDeltaReader(m, rowCols, transposed, sampleSize); } /** @@ -691,4 +690,69 @@ private static IEncode createWithReaderSparse(MatrixBlock m, DblArrayCountHashMa public static SparseEncoding createSparse(AMapToData map, AOffset off, int nRows) { return new SparseEncoding(map, off, nRows); } + + private static IEncode createWithDeltaReader(MatrixBlock m, IColIndex rowCols, boolean transposed, int sampleSize) { + final int rl = 0; + final int ru = Math.min(sampleSize, transposed ? m.getNumColumns() : m.getNumRows()); + final ReaderColumnSelection reader1 = ReaderColumnSelection.createDeltaReader(m, rowCols, transposed, rl, ru); + final int nRows = transposed ? m.getNumColumns() : m.getNumRows(); + final DblArrayCountHashMap map = new DblArrayCountHashMap(); + final IntArrayList offsets = new IntArrayList(); + DblArray cellVals = reader1.nextRow(); + boolean isFirstRow = true; + + while(cellVals != null) { + map.increment(cellVals); + if(isFirstRow || !cellVals.isEmpty()) + offsets.appendValue(reader1.getCurrentRowIndex()); + isFirstRow = false; + cellVals = reader1.nextRow(); + } + + if(offsets.size() == 0) + return new EmptyEncoding(); + else if(map.size() == 1 && offsets.size() == ru) + return new ConstEncoding(ru); + + final ReaderColumnSelection reader2 = ReaderColumnSelection.createDeltaReader(m, rowCols, transposed, rl, ru); + if(offsets.size() < ru / 4) + return createWithDeltaReaderSparse(m, map, rowCols, offsets, ru, reader2); + else + return createWithDeltaReaderDense(m, map, rowCols, ru, offsets.size() < ru, reader2); + } + + private static IEncode createWithDeltaReaderDense(MatrixBlock m, DblArrayCountHashMap map, IColIndex rowCols, + int nRows, boolean zero, ReaderColumnSelection reader2) { + final int unique = map.size() + (zero ? 1 : 0); + final AMapToData d = MapToFactory.create(nRows, unique); + + DblArray cellVals; + if(zero) + while((cellVals = reader2.nextRow()) != null) + d.set(reader2.getCurrentRowIndex(), map.getId(cellVals) + 1); + else + while((cellVals = reader2.nextRow()) != null) + d.set(reader2.getCurrentRowIndex(), map.getId(cellVals)); + + return new DenseEncoding(d); + } + + private static IEncode createWithDeltaReaderSparse(MatrixBlock m, DblArrayCountHashMap map, IColIndex rowCols, + IntArrayList offsets, int nRows, ReaderColumnSelection reader2) { + DblArray cellVals = reader2.nextRow(); + final AMapToData d = MapToFactory.create(offsets.size(), map.size()); + + int i = 0; + boolean isFirstRow = true; + while(cellVals != null) { + if(isFirstRow || !cellVals.isEmpty()) { + d.set(i++, map.getId(cellVals)); + } + isFirstRow = false; + cellVals = reader2.nextRow(); + } + + final AOffset o = OffsetFactory.createOffset(offsets); + return new SparseEncoding(d, o, nRows); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java index f858f15b746..cc0ff901df4 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUnary.java @@ -21,10 +21,15 @@ import java.util.ArrayList; import java.util.List; +import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.CompressionStatistics; import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode; import org.apache.sysds.runtime.matrix.data.LibMatrixAgg; @@ -43,10 +48,40 @@ public static MatrixBlock unaryOperations(CompressedMatrixBlock m, UnaryOperator final boolean overlapping = m.isOverlapping(); final int r = m.getNumRows(); final int c = m.getNumColumns(); + // early aborts: if(m.isEmpty()) return new MatrixBlock(r, c, 0).unaryOperations(op, result); - else if(overlapping) { + + if(Builtin.isBuiltinCode(op.fn, BuiltinCode.CUMSUM)) { + MatrixBlock uncompressed = m.getUncompressed("CUMSUM requires uncompression", op.getNumThreads()); + MatrixBlock opResult = uncompressed.unaryOperations(op, null); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + csb.clearValidCompression(); + csb.setPreferDeltaEncoding(true); + csb.addValidCompression(CompressionType.DeltaDDC); + csb.addValidCompression(CompressionType.UNCOMPRESSED); + csb.setTransposeInput("false"); + Pair compressedPair = CompressedMatrixBlockFactory.compress(opResult, op.getNumThreads(), csb); + MatrixBlock compressedResult = compressedPair.getLeft(); + + if(compressedResult == null) { + compressedResult = opResult; + } + + CompressedMatrixBlock finalResult; + if(compressedResult instanceof CompressedMatrixBlock) { + finalResult = (CompressedMatrixBlock) compressedResult; + } + else { + finalResult = CompressedMatrixBlockFactory.genUncompressedCompressedMatrixBlock(compressedResult); + } + + return finalResult; + } + + if(overlapping) { // when in overlapping state it is guaranteed that there is no infinites, NA, or NANs. if(Builtin.isBuiltinCode(op.fn, BuiltinCode.ISINF, BuiltinCode.ISNA, BuiltinCode.ISNAN)) return new MatrixBlock(r, c, 0); @@ -64,8 +99,9 @@ else if(Builtin.isBuiltinCode(op.fn, BuiltinCode.ISINF, BuiltinCode.ISNAN, Built return new MatrixBlock(r, c, 0); // avoid unnecessary allocation else if(LibMatrixAgg.isSupportedUnaryOperator(op)) { String message = "Unary Op not supported: " + op.fn.getClass().getSimpleName(); - // e.g., cumsum/cumprod/cummin/cumax/cumsumprod - return m.getUncompressed(message, op.getNumThreads()).unaryOperations(op, null); + MatrixBlock uncompressed = m.getUncompressed(message, op.getNumThreads()); + MatrixBlock opResult = uncompressed.unaryOperations(op, null); + return opResult; } else { diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java index d6ec60336f0..1734d39f4ce 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelection.java @@ -193,13 +193,63 @@ else if(rawBlock.isInSparseFormat()) { else { return new ReaderColumnSelectionDenseSingleBlockQuantized(rawBlock, colIndices, rl, ru, scaleFactors); } - } + } + + /** + * Create a reader of the matrix block that computes delta values (current row - previous row) on-the-fly. + * + * Note the reader reuse the return, therefore if needed for something please copy the returned rows. + * The first row is returned as-is (no delta computation). + * + * @param rawBlock The block to iterate though + * @param colIndices The column indexes to extract and insert into the double array + * @param transposed If the raw block should be treated as transposed + * @return A delta reader of the columns specified + */ + public static ReaderColumnSelection createDeltaReader(MatrixBlock rawBlock, IColIndex colIndices, boolean transposed) { + final int rl = 0; + final int ru = transposed ? rawBlock.getNumColumns() : rawBlock.getNumRows(); + return createDeltaReader(rawBlock, colIndices, transposed, rl, ru); + } + + /** + * Create a reader of the matrix block that computes delta values (current row - previous row) on-the-fly. + * + * Note the reader reuse the return, therefore if needed for something please copy the returned rows. + * The first row is returned as-is (no delta computation). + * + * @param rawBlock The block to iterate though + * @param colIndices The column indexes to extract and insert into the double array + * @param transposed If the raw block should be treated as transposed + * @param rl The row to start at + * @param ru The row to end at (not inclusive) + * @return A delta reader of the columns specified + */ + public static ReaderColumnSelection createDeltaReader(MatrixBlock rawBlock, IColIndex colIndices, boolean transposed, + int rl, int ru) { + checkInput(rawBlock, colIndices, rl, ru, transposed); + rl = rl - 1; + if(rawBlock.isEmpty()) { + LOG.warn("It is likely an error occurred when reading an empty block, but we do support it!"); + return new ReaderColumnSelectionEmpty(rawBlock, colIndices, rl, ru, transposed); + } + + if(transposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + + if(rawBlock.isInSparseFormat()) + return new ReaderColumnSelectionSparseDelta(rawBlock, colIndices, rl, ru); + else if(rawBlock.getDenseBlock().numBlocks() > 1) + return new ReaderColumnSelectionDenseMultiBlockDelta(rawBlock, colIndices, rl, ru); + return new ReaderColumnSelectionDenseSingleBlockDelta(rawBlock, colIndices, rl, ru); + } private static void checkInput(final MatrixBlock rawBlock, final IColIndex colIndices, final int rl, final int ru, final boolean transposed) { - if(colIndices.size() <= 1) + if(colIndices.size() < 1) throw new DMLCompressionException( - "Column selection reader should not be done on single column groups: " + colIndices); + "Column selection reader should not be done on empty column groups: " + colIndices); else if(rl >= ru) throw new DMLCompressionException("Invalid inverse range for reader " + rl + " to " + ru); diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseMultiBlockDelta.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseMultiBlockDelta.java new file mode 100644 index 00000000000..f700ebd94b7 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseMultiBlockDelta.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.readers; + +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class ReaderColumnSelectionDenseMultiBlockDelta extends ReaderColumnSelection { + private final DenseBlock _data; + private final double[] _previousRow; + private boolean _isFirstRow; + + protected ReaderColumnSelectionDenseMultiBlockDelta(MatrixBlock data, IColIndex colIndices, int rl, int ru) { + super(colIndices, rl, Math.min(ru, data.getNumRows()) - 1); + _data = data.getDenseBlock(); + _previousRow = new double[colIndices.size()]; + _isFirstRow = true; + } + + protected DblArray getNextRow() { + _rl++; + + if(_isFirstRow) { + for(int i = 0; i < _colIndexes.size(); i++) { + final double val = _data.get(_rl, _colIndexes.get(i)); + _previousRow[i] = val; + reusableArr[i] = val; + } + _isFirstRow = false; + } + else { + for(int i = 0; i < _colIndexes.size(); i++) { + final double currentVal = _data.get(_rl, _colIndexes.get(i)); + reusableArr[i] = currentVal - _previousRow[i]; + _previousRow[i] = currentVal; + } + } + + return reusableReturn; + } +} + + + diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseSingleBlockDelta.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseSingleBlockDelta.java new file mode 100644 index 00000000000..65f13343201 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionDenseSingleBlockDelta.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.readers; + +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class ReaderColumnSelectionDenseSingleBlockDelta extends ReaderColumnSelection { + private final double[] _data; + private final int _numCols; + private final double[] _previousRow; + private boolean _isFirstRow; + + protected ReaderColumnSelectionDenseSingleBlockDelta(MatrixBlock data, IColIndex colIndices, int rl, int ru) { + super(colIndices, rl, Math.min(ru, data.getNumRows()) - 1); + _data = data.getDenseBlockValues(); + _numCols = data.getNumColumns(); + _previousRow = new double[colIndices.size()]; + _isFirstRow = true; + } + + protected DblArray getNextRow() { + _rl++; + final int indexOff = _rl * _numCols; + + if(_isFirstRow) { + for(int i = 0; i < _colIndexes.size(); i++) { + final double val = _data[indexOff + _colIndexes.get(i)]; + _previousRow[i] = val; + reusableArr[i] = val; + } + _isFirstRow = false; + } + else { + for(int i = 0; i < _colIndexes.size(); i++) { + final double currentVal = _data[indexOff + _colIndexes.get(i)]; + reusableArr[i] = currentVal - _previousRow[i]; + _previousRow[i] = currentVal; + } + } + + return reusableReturn; + } +} + + + diff --git a/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionSparseDelta.java b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionSparseDelta.java new file mode 100644 index 00000000000..8ea1fff3396 --- /dev/null +++ b/src/main/java/org/apache/sysds/runtime/compress/readers/ReaderColumnSelectionSparseDelta.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.compress.readers; + +import java.util.Arrays; + +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class ReaderColumnSelectionSparseDelta extends ReaderColumnSelection { + + private final SparseBlock _a; + private final double[] _previousRow; + private boolean _isFirstRow; + + protected ReaderColumnSelectionSparseDelta(MatrixBlock data, IColIndex colIndexes, int rl, int ru) { + super(colIndexes, rl, Math.min(ru, data.getNumRows()) - 1); + _a = data.getSparseBlock(); + _previousRow = new double[colIndexes.size()]; + _isFirstRow = true; + } + + protected final DblArray getNextRow() { + _rl++; + for(int i = 0; i < _colIndexes.size(); i++) + reusableArr[i] = 0.0; + + if(!_a.isEmpty(_rl)) + processInRange(_rl); + + if(_isFirstRow) { + for(int i = 0; i < _colIndexes.size(); i++) + _previousRow[i] = reusableArr[i]; + _isFirstRow = false; + } + else { + for(int i = 0; i < _colIndexes.size(); i++) { + final double currentVal = reusableArr[i]; + reusableArr[i] = currentVal - _previousRow[i]; + _previousRow[i] = currentVal; + } + } + + return reusableReturn; + } + + final void processInRange(final int r) { + final int apos = _a.pos(r); + final int alen = _a.size(r) + apos; + final int[] aix = _a.indexes(r); + final double[] avals = _a.values(r); + int skip = 0; + int j = Arrays.binarySearch(aix, apos, alen, _colIndexes.get(0)); + if(j < 0) + j = Math.abs(j + 1); + + while(skip < _colIndexes.size() && j < alen) { + if(_colIndexes.get(skip) == aix[j]) { + reusableArr[skip] = avals[j]; + skip++; + j++; + } + else if(_colIndexes.get(skip) > aix[j]) + j++; + else + skip++; + } + } +} + + + diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java new file mode 100644 index 00000000000..0f04cfc9c27 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDDCTest.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.colgroup; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC; +import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ColGroupDDCTest { + + protected static final Log LOG = LogFactory.getLog(ColGroupDDCTest.class.getName()); + + @Test + public void testConvertToDeltaDDCBasic() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {10.0, 20.0, 11.0, 21.0, 12.0, 22.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(10.0, mb.get(0, 0), 0.0); + assertEquals(20.0, mb.get(0, 1), 0.0); + assertEquals(11.0, mb.get(1, 0), 0.0); + assertEquals(21.0, mb.get(1, 1), 0.0); + assertEquals(12.0, mb.get(2, 0), 0.0); + assertEquals(22.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCSingleColumn() { + IColIndex colIndexes = ColIndexFactory.create(1); + double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(5, 5); + for(int i = 0; i < 5; i++) + data.set(i, i); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(5, 1, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 5); + + assertEquals(1.0, mb.get(0, 0), 0.0); + assertEquals(2.0, mb.get(1, 0), 0.0); + assertEquals(3.0, mb.get(2, 0), 0.0); + assertEquals(4.0, mb.get(3, 0), 0.0); + assertEquals(5.0, mb.get(4, 0), 0.0); + } + + @Test + public void testConvertToDeltaDDCWithRepeatedValues() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {10.0, 20.0, 10.0, 20.0, 10.0, 20.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(10.0, mb.get(0, 0), 0.0); + assertEquals(20.0, mb.get(0, 1), 0.0); + assertEquals(10.0, mb.get(1, 0), 0.0); + assertEquals(20.0, mb.get(1, 1), 0.0); + assertEquals(10.0, mb.get(2, 0), 0.0); + assertEquals(20.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCWithNegativeDeltas() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {10.0, 20.0, 8.0, 15.0, 12.0, 25.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(10.0, mb.get(0, 0), 0.0); + assertEquals(20.0, mb.get(0, 1), 0.0); + assertEquals(8.0, mb.get(1, 0), 0.0); + assertEquals(15.0, mb.get(1, 1), 0.0); + assertEquals(12.0, mb.get(2, 0), 0.0); + assertEquals(25.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCWithZeroDeltas() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {5.0, 0.0, 5.0, 0.0, 0.0, 5.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(3, 3); + data.set(0, 0); + data.set(1, 1); + data.set(2, 2); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 3); + + assertEquals(5.0, mb.get(0, 0), 0.0); + assertEquals(0.0, mb.get(0, 1), 0.0); + assertEquals(5.0, mb.get(1, 0), 0.0); + assertEquals(0.0, mb.get(1, 1), 0.0); + assertEquals(0.0, mb.get(2, 0), 0.0); + assertEquals(5.0, mb.get(2, 1), 0.0); + } + + @Test + public void testConvertToDeltaDDCMultipleUniqueDeltas() { + IColIndex colIndexes = ColIndexFactory.create(2); + double[] dictValues = new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + Dictionary dict = Dictionary.create(dictValues); + AMapToData data = MapToFactory.create(4, 4); + for(int i = 0; i < 4; i++) + data.set(i, i); + + ColGroupDDC ddc = (ColGroupDDC) ColGroupDDC.create(colIndexes, dict, data, null); + AColGroup result = ddc.convertToDeltaDDC(); + + assertNotNull(result); + assertTrue(result instanceof ColGroupDeltaDDC); + ColGroupDeltaDDC deltaDDC = (ColGroupDeltaDDC) result; + + MatrixBlock mb = new MatrixBlock(4, 2, false); + mb.allocateDenseBlock(); + deltaDDC.decompressToDenseBlock(mb.getDenseBlock(), 0, 4); + + assertEquals(1.0, mb.get(0, 0), 0.0); + assertEquals(2.0, mb.get(0, 1), 0.0); + assertEquals(3.0, mb.get(1, 0), 0.0); + assertEquals(4.0, mb.get(1, 1), 0.0); + assertEquals(5.0, mb.get(2, 0), 0.0); + assertEquals(6.0, mb.get(2, 1), 0.0); + assertEquals(7.0, mb.get(3, 0), 0.0); + assertEquals(8.0, mb.get(3, 1), 0.0); + } +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java index 0f2d965bce8..c953792a038 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupDeltaDDCTest.java @@ -19,64 +19,747 @@ package org.apache.sysds.test.component.compress.colgroup; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Collections; +import java.util.EnumSet; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.compress.CompressionSettings; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC; +import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; +import org.apache.sysds.runtime.compress.colgroup.ColGroupIO; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.estim.ComEstExact; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.functionobjects.Divide; +import org.apache.sysds.runtime.functionobjects.Equals; +import org.apache.sysds.runtime.functionobjects.Multiply; +import org.apache.sysds.runtime.functionobjects.GreaterThan; +import org.apache.sysds.runtime.functionobjects.Minus; +import org.apache.sysds.runtime.functionobjects.Plus; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.RightScalarOperator; +import org.apache.sysds.runtime.matrix.operators.ScalarOperator; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; +import org.apache.sysds.runtime.util.DataConverter; +import org.junit.Test; + public class ColGroupDeltaDDCTest { - // protected static final Log LOG = LogFactory.getLog(JolEstimateTest.class.getName()); - - // @Test - // public void testDecompressToDenseBlockSingleColumn() { - // testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}}, true); - // } - - // @Test - // public void testDecompressToDenseBlockSingleColumnTransposed() { - // testDecompressToDenseBlock(new double[][] {{1}, {2}, {3}, {4}, {5}}, false); - // } - - // @Test - // public void testDecompressToDenseBlockTwoColumns() { - // testDecompressToDenseBlock(new double[][] {{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}}, false); - // } - - // @Test - // public void testDecompressToDenseBlockTwoColumnsTransposed() { - // testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}, {1, 1, 1, 1, 1}}, true); - // } - - // public void testDecompressToDenseBlock(double[][] data, boolean isTransposed) { - // MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); - - // final int numCols = isTransposed ? mbt.getNumRows() : mbt.getNumColumns(); - // final int numRows = isTransposed ? mbt.getNumColumns() : mbt.getNumRows(); - // int[] colIndexes = new int[numCols]; - // for(int x = 0; x < numCols; x++) - // colIndexes[x] = x; - - // try { - // CompressionSettings cs = new CompressionSettingsBuilder().setSamplingRatio(1.0) - // .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)).create(); - // cs.transposed = isTransposed; - - // final CompressedSizeInfoColGroup cgi = new CompressedSizeEstimatorExact(mbt, cs) - // .getColGroupInfo(colIndexes); - // CompressedSizeInfo csi = new CompressedSizeInfo(cgi); - // AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); - - // // Decompress to dense block - // MatrixBlock ret = new MatrixBlock(numRows, numCols, false); - // ret.allocateDenseBlock(); - // cg.decompressToDenseBlock(ret.getDenseBlock(), 0, numRows); - - // MatrixBlock expected = DataConverter.convertToMatrixBlock(data); - // if(isTransposed) - // LibMatrixReorg.transposeInPlace(expected, 1); - // Assert.assertArrayEquals(expected.getDenseBlockValues(), ret.getDenseBlockValues(), 0.01); - - // } - // catch(Exception e) { - // e.printStackTrace(); - // throw new DMLRuntimeException("Failed construction : " + this.getClass().getSimpleName()); - // } - // } + protected static final Log LOG = LogFactory.getLog(ColGroupDeltaDDCTest.class.getName()); + + @Test + public void testDecompressToDenseBlockSingleColumn() { + testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}}, false); + } + + @Test(expected = NotImplementedException.class) + public void testDecompressToDenseBlockSingleColumnTransposed() { + testDecompressToDenseBlock(new double[][] {{1}, {2}, {3}, {4}, {5}}, true); + } + + @Test + public void testDecompressToDenseBlockTwoColumns() { + testDecompressToDenseBlock(new double[][] {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}, false); + } + + @Test(expected = NotImplementedException.class) + public void testDecompressToDenseBlockTwoColumnsTransposed() { + testDecompressToDenseBlock(new double[][] {{1, 2, 3, 4, 5}, {1, 1, 1, 1, 1}}, true); + } + + @Test + public void testDecompressToDenseBlockPartialRangeSingleColumn() { + testDecompressToDenseBlockPartialRange(new double[][] {{1}, {2}, {3}, {4}, {5}}, false, 2, 5); + } + + @Test + public void testDecompressToDenseBlockPartialRangeTwoColumns() { + testDecompressToDenseBlockPartialRange(new double[][] {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}, false, 1, 4); + } + + @Test + public void testDecompressToDenseBlockPartialRangeFromMiddle() { + testDecompressToDenseBlockPartialRange(new double[][] {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}}, false, 3, 6); + } + + public void testDecompressToDenseBlock(double[][] data, boolean isTransposed) { + if(isTransposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + try { + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + MatrixBlock ret = new MatrixBlock(numRows, numCols, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), 0, numRows); + + MatrixBlock expected = DataConverter.convertToMatrixBlock(data); + assertArrayEquals(expected.getDenseBlockValues(), ret.getDenseBlockValues(), 0.01); + + } + catch(NotImplementedException e) { + throw e; + } + catch(Exception e) { + e.printStackTrace(); + throw new DMLRuntimeException("Failed construction : " + this.getClass().getSimpleName(), e); + } + } + + public void testDecompressToDenseBlockPartialRange(double[][] data, boolean isTransposed, int rl, int ru) { + if(isTransposed) { + throw new NotImplementedException("Delta encoding for transposed matrices not yet implemented"); + } + + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + try { + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup cg = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + assertTrue("Column group should be DeltaDDC, not Const", cg instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(numRows, numCols, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), rl, ru); + + MatrixBlock expected = DataConverter.convertToMatrixBlock(data); + for(int i = rl; i < ru; i++) { + for(int j = 0; j < numCols; j++) { + double expectedValue = expected.get(i, j); + double actualValue = ret.get(i, j); + assertArrayEquals(new double[] {expectedValue}, new double[] {actualValue}, 0.01); + } + } + + } + catch(NotImplementedException e) { + throw e; + } + catch(Exception e) { + e.printStackTrace(); + throw new DMLRuntimeException("Failed partial range decompression : " + this.getClass().getSimpleName(), e); + } + } + + @Test + public void testSerializationSingleColumn() throws IOException { + double[][] data = {{1}, {2}, {3}, {4}, {5}}; + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getDeltaColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup original = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + assertTrue("Original should be ColGroupDeltaDDC", original instanceof ColGroupDeltaDDC); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + ColGroupIO.writeGroups(dos, Collections.singletonList(original)); + assertEquals(original.getExactSizeOnDisk() + 4, bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + AColGroup deserialized = ColGroupIO.readGroups(dis, numRows).get(0); + + assertTrue("Deserialized should be ColGroupDeltaDDC", deserialized instanceof ColGroupDeltaDDC); + assertEquals("Compression type should match", original.getCompType(), deserialized.getCompType()); + assertEquals("Exact size on disk should match", original.getExactSizeOnDisk(), deserialized.getExactSizeOnDisk()); + + MatrixBlock originalDecompressed = new MatrixBlock(numRows, numCols, false); + originalDecompressed.allocateDenseBlock(); + original.decompressToDenseBlock(originalDecompressed.getDenseBlock(), 0, numRows); + + MatrixBlock deserializedDecompressed = new MatrixBlock(numRows, numCols, false); + deserializedDecompressed.allocateDenseBlock(); + deserialized.decompressToDenseBlock(deserializedDecompressed.getDenseBlock(), 0, numRows); + + for(int i = 0; i < numRows; i++) { + for(int j = 0; j < numCols; j++) { + assertArrayEquals(new double[] {originalDecompressed.get(i, j)}, new double[] {deserializedDecompressed.get(i, j)}, 0.01); + } + } + } + + @Test + public void testSerializationTwoColumns() throws IOException { + double[][] data = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}; + MatrixBlock mbt = DataConverter.convertToMatrixBlock(data); + final int numCols = mbt.getNumColumns(); + final int numRows = mbt.getNumRows(); + IColIndex colIndexes = ColIndexFactory.create(numCols); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .setTransposeInput("false"); + CompressionSettings cs = csb.create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getDeltaColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + AColGroup original = ColGroupFactory.compressColGroups(mbt, csi, cs, 1).get(0); + + assertTrue("Original should be ColGroupDeltaDDC", original instanceof ColGroupDeltaDDC); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + ColGroupIO.writeGroups(dos, Collections.singletonList(original)); + assertEquals(original.getExactSizeOnDisk() + 4, bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + AColGroup deserialized = ColGroupIO.readGroups(dis, numRows).get(0); + + assertTrue("Deserialized should be ColGroupDeltaDDC", deserialized instanceof ColGroupDeltaDDC); + assertEquals("Compression type should match", original.getCompType(), deserialized.getCompType()); + assertEquals("Exact size on disk should match", original.getExactSizeOnDisk(), deserialized.getExactSizeOnDisk()); + + MatrixBlock originalDecompressed = new MatrixBlock(numRows, numCols, false); + originalDecompressed.allocateDenseBlock(); + original.decompressToDenseBlock(originalDecompressed.getDenseBlock(), 0, numRows); + + MatrixBlock deserializedDecompressed = new MatrixBlock(numRows, numCols, false); + deserializedDecompressed.allocateDenseBlock(); + deserialized.decompressToDenseBlock(deserializedDecompressed.getDenseBlock(), 0, numRows); + + for(int i = 0; i < numRows; i++) { + for(int j = 0; j < numCols; j++) { + assertArrayEquals(new double[] {originalDecompressed.get(i, j)}, new double[] {deserializedDecompressed.get(i, j)}, 0.01); + } + } + } + + @Test + public void testScalarEquals() { + double[][] data = {{0}, {1}, {2}, {3}, {0}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Equals.getEqualsFnObject(), 0.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(0.0, ret.get(1, 0), 0.0); + assertEquals(0.0, ret.get(2, 0), 0.0); + assertEquals(0.0, ret.get(3, 0), 0.0); + assertEquals(1.0, ret.get(4, 0), 0.0); + } + + @Test + public void testScalarGreaterThan() { + double[][] data = {{0}, {1}, {2}, {3}, {0}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(GreaterThan.getGreaterThanFnObject(), 1.5); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(0.0, ret.get(0, 0), 0.0); + assertEquals(0.0, ret.get(1, 0), 0.0); + assertEquals(1.0, ret.get(2, 0), 0.0); + assertEquals(1.0, ret.get(3, 0), 0.0); + assertEquals(0.0, ret.get(4, 0), 0.0); + } + + @Test + public void testScalarPlus() { + double[][] data = {{1}, {2}, {3}, {4}, {5}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 10.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(11.0, ret.get(0, 0), 0.0); + assertEquals(12.0, ret.get(1, 0), 0.0); + assertEquals(13.0, ret.get(2, 0), 0.0); + assertEquals(14.0, ret.get(3, 0), 0.0); + assertEquals(15.0, ret.get(4, 0), 0.0); + } + + @Test + public void testScalarMinus() { + double[][] data = {{11}, {12}, {13}, {14}, {15}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Minus.getMinusFnObject(), 10.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(1, 0), 0.0); + assertEquals(3.0, ret.get(2, 0), 0.0); + assertEquals(4.0, ret.get(3, 0), 0.0); + assertEquals(5.0, ret.get(4, 0), 0.0); + } + + @Test + public void testUnaryOperationSqrt() { + double[][] data = {{1}, {4}, {9}, {16}, {25}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.SQRT)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.01); + assertEquals(2.0, ret.get(1, 0), 0.01); + assertEquals(3.0, ret.get(2, 0), 0.01); + assertEquals(4.0, ret.get(3, 0), 0.01); + assertEquals(5.0, ret.get(4, 0), 0.01); + } + + @Test + public void testScalarEqualsMultiColumn() { + double[][] data = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {0, 1}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Equals.getEqualsFnObject(), 0.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 2, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(0.0, ret.get(0, 1), 0.0); + assertEquals(0.0, ret.get(1, 0), 0.0); + assertEquals(0.0, ret.get(1, 1), 0.0); + assertEquals(0.0, ret.get(2, 0), 0.0); + assertEquals(0.0, ret.get(2, 1), 0.0); + assertEquals(0.0, ret.get(3, 0), 0.0); + assertEquals(0.0, ret.get(3, 1), 0.0); + assertEquals(1.0, ret.get(4, 0), 0.0); + assertEquals(0.0, ret.get(4, 1), 0.0); + } + + @Test + public void testScalarMultiply() { + double[][] data = {{1}, {2}, {3}, {4}, {5}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Multiply.getMultiplyFnObject(), 2.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(2.0, ret.get(0, 0), 0.0); + assertEquals(4.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(2, 0), 0.0); + assertEquals(8.0, ret.get(3, 0), 0.0); + assertEquals(10.0, ret.get(4, 0), 0.0); + } + + @Test + public void testScalarDivide() { + double[][] data = {{2}, {4}, {6}, {8}, {10}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Divide.getDivideFnObject(), 2.0); + AColGroup res = cg.scalarOperation(op); + + MatrixBlock ret = new MatrixBlock(5, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 5); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(1, 0), 0.0); + assertEquals(3.0, ret.get(2, 0), 0.0); + assertEquals(4.0, ret.get(3, 0), 0.0); + assertEquals(5.0, ret.get(4, 0), 0.0); + } + + @Test + public void testSliceRows() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(1, 4); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(3.0, ret.get(0, 0), 0.0); + assertEquals(4.0, ret.get(0, 1), 0.0); + assertEquals(5.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(1, 1), 0.0); + assertEquals(7.0, ret.get(2, 0), 0.0); + assertEquals(8.0, ret.get(2, 1), 0.0); + } + + @Test + public void testSliceRowsWithMatchingDictionaryEntry() { + double[][] data = {{1, 2}, {3, 4}, {1, 2}, {5, 6}, {7, 8}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(2, 5); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(0, 1), 0.0); + assertEquals(5.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(1, 1), 0.0); + assertEquals(7.0, ret.get(2, 0), 0.0); + assertEquals(8.0, ret.get(2, 1), 0.0); + } + + @Test + public void testSliceRowsWithNoMatchingDictionaryEntry() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(1, 3); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(2, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 2); + + assertEquals(3.0, ret.get(0, 0), 0.0); + assertEquals(4.0, ret.get(0, 1), 0.0); + assertEquals(5.0, ret.get(1, 0), 0.0); + assertEquals(6.0, ret.get(1, 1), 0.0); + } + + @Test + public void testSliceRowsFromMiddleRow() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; + AColGroup cg = compressForTest(data); + + AColGroup sliced = cg.sliceRows(2, 4); + assertTrue(sliced instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(2, 2, false); + ret.allocateDenseBlock(); + sliced.decompressToDenseBlock(ret.getDenseBlock(), 0, 2); + + assertEquals(5.0, ret.get(0, 0), 0.0); + assertEquals(6.0, ret.get(0, 1), 0.0); + assertEquals(7.0, ret.get(1, 0), 0.0); + assertEquals(8.0, ret.get(1, 1), 0.0); + } + + @Test + public void testDecompressToSparseBlock() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(3, 2, true); + ret.allocateSparseRowsBlock(); + cg.decompressToSparseBlock(ret.getSparseBlock(), 0, 3); + + assertEquals(1.0, ret.get(0, 0), 0.0); + assertEquals(2.0, ret.get(0, 1), 0.0); + assertEquals(3.0, ret.get(1, 0), 0.0); + assertEquals(4.0, ret.get(1, 1), 0.0); + assertEquals(5.0, ret.get(2, 0), 0.0); + assertEquals(6.0, ret.get(2, 1), 0.0); + } + + @Test + public void testDecompressToSparseBlockWithRlGreaterThanZero() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(4, 2, true); + ret.allocateSparseRowsBlock(); + cg.decompressToSparseBlock(ret.getSparseBlock(), 2, 4, 0, 0); + + assertEquals(5.0, ret.get(2, 0), 0.0); + assertEquals(6.0, ret.get(2, 1), 0.0); + assertEquals(7.0, ret.get(3, 0), 0.0); + assertEquals(8.0, ret.get(3, 1), 0.0); + } + + @Test + public void testDecompressToSparseBlockWithOffset() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(5, 4, true); + ret.allocateSparseRowsBlock(); + cg.decompressToSparseBlock(ret.getSparseBlock(), 0, 3, 1, 1); + + assertEquals(1.0, ret.get(1, 1), 0.0); + assertEquals(2.0, ret.get(1, 2), 0.0); + assertEquals(3.0, ret.get(2, 1), 0.0); + assertEquals(4.0, ret.get(2, 2), 0.0); + assertEquals(5.0, ret.get(3, 1), 0.0); + assertEquals(6.0, ret.get(3, 2), 0.0); + } + + @Test + public void testGetNumberNonZeros() { + double[][] data = {{1, 0}, {2, 3}, {0, 4}, {5, 0}}; + AColGroup cg = compressForTest(data); + + long nnz = cg.getNumberNonZeros(4); + assertEquals(5L, nnz); + } + + @Test + public void testGetNumberNonZerosAllZeros() { + double[][] data = {{0, 0}, {0, 0}, {0, 0}}; + AColGroup cg = compressForTest(data); + + long nnz = cg.getNumberNonZeros(3); + assertEquals(0L, nnz); + } + + @Test + public void testGetNumberNonZerosAllNonZeros() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + long nnz = cg.getNumberNonZeros(3); + assertEquals(6L, nnz); + } + + @Test + public void testDecompressToDenseBlockNonContiguousPath() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(3, 5, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), 0, 3, 0, 2); + + assertEquals(1.0, ret.get(0, 2), 0.0); + assertEquals(2.0, ret.get(0, 3), 0.0); + assertEquals(3.0, ret.get(1, 2), 0.0); + assertEquals(4.0, ret.get(1, 3), 0.0); + assertEquals(5.0, ret.get(2, 2), 0.0); + assertEquals(6.0, ret.get(2, 3), 0.0); + } + + @Test + public void testDecompressToDenseBlockFirstRowPath() { + double[][] data = {{10, 20}, {11, 21}, {12, 22}}; + AColGroup cg = compressForTest(data); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + cg.decompressToDenseBlock(ret.getDenseBlock(), 0, 1); + + assertEquals(10.0, ret.get(0, 0), 0.0); + assertEquals(20.0, ret.get(0, 1), 0.0); + } + + @Test + public void testScalarOperationShiftWithExistingMatch() { + double[][] data = {{1}, {2}, {3}, {1}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 1.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(4, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 4); + + assertEquals(2.0, ret.get(0, 0), 0.0); + assertEquals(3.0, ret.get(1, 0), 0.0); + assertEquals(4.0, ret.get(2, 0), 0.0); + assertEquals(2.0, ret.get(3, 0), 0.0); + } + + @Test + public void testScalarOperationShiftWithCountsId0EqualsOne() { + double[][] data = {{1}, {2}, {3}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 5.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(6.0, ret.get(0, 0), 0.0); + assertEquals(7.0, ret.get(1, 0), 0.0); + assertEquals(8.0, ret.get(2, 0), 0.0); + } + + @Test + public void testScalarOperationShiftWithNoMatch() { + double[][] data = {{1}, {2}, {3}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + ScalarOperator op = new RightScalarOperator(Plus.getPlusFnObject(), 10.0); + AColGroup res = cg.scalarOperation(op); + assertTrue("Should remain DeltaDDC after shift", res instanceof ColGroupDeltaDDC); + + MatrixBlock ret = new MatrixBlock(3, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(11.0, ret.get(0, 0), 0.0); + assertEquals(12.0, ret.get(1, 0), 0.0); + assertEquals(13.0, ret.get(2, 0), 0.0); + } + + @Test + public void testUnaryOperationTriggersConvertToDDC() { + double[][] data = {{1, 2}, {3, 4}, {5, 6}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(1.0, ret.get(0, 0), 0.01); + assertEquals(2.0, ret.get(0, 1), 0.01); + assertEquals(3.0, ret.get(1, 0), 0.01); + assertEquals(4.0, ret.get(1, 1), 0.01); + assertEquals(5.0, ret.get(2, 0), 0.01); + assertEquals(6.0, ret.get(2, 1), 0.01); + } + + @Test + public void testUnaryOperationWithConstantResultSingleColumn() { + double[][] data = {{5}, {5}, {5}, {5}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(4, 1, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 4); + + assertEquals(5.0, ret.get(0, 0), 0.01); + assertEquals(5.0, ret.get(1, 0), 0.01); + assertEquals(5.0, ret.get(2, 0), 0.01); + assertEquals(5.0, ret.get(3, 0), 0.01); + } + + @Test + public void testUnaryOperationWithConstantResultMultiColumn() { + double[][] data = {{10, 20}, {10, 20}, {10, 20}}; + AColGroup cg = compressForTest(data); + assertTrue(cg instanceof ColGroupDeltaDDC); + + UnaryOperator op = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + AColGroup res = cg.unaryOperation(op); + + MatrixBlock ret = new MatrixBlock(3, 2, false); + ret.allocateDenseBlock(); + res.decompressToDenseBlock(ret.getDenseBlock(), 0, 3); + + assertEquals(10.0, ret.get(0, 0), 0.01); + assertEquals(20.0, ret.get(0, 1), 0.01); + assertEquals(10.0, ret.get(1, 0), 0.01); + assertEquals(20.0, ret.get(1, 1), 0.01); + assertEquals(10.0, ret.get(2, 0), 0.01); + assertEquals(20.0, ret.get(2, 1), 0.01); + } + + private AColGroup compressForTest(double[][] data) { + MatrixBlock mb = DataConverter.convertToMatrixBlock(data); + IColIndex colIndexes = ColIndexFactory.create(data[0].length); + CompressionSettings cs = new CompressionSettingsBuilder() + .setValidCompressions(EnumSet.of(AColGroup.CompressionType.DeltaDDC)) + .setPreferDeltaEncoding(true) + .create(); + + final CompressedSizeInfoColGroup cgi = new ComEstExact(mb, cs).getDeltaColGroupInfo(colIndexes); + CompressedSizeInfo csi = new CompressedSizeInfo(cgi); + return ColGroupFactory.compressColGroups(mb, csi, cs, 1).get(0); + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryDeltaDDCTest.java new file mode 100644 index 00000000000..c7439652956 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryDeltaDDCTest.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.colgroup; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.sysds.runtime.compress.CompressionSettings; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.colgroup.ColGroupDeltaDDC; +import org.apache.sysds.runtime.compress.colgroup.ColGroupEmpty; +import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; +import org.apache.sysds.runtime.compress.estim.EstimationFactors; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ColGroupFactoryDeltaDDCTest { + + @Test + public void testCompressDeltaDDCSingleColumnWithGaps() { + MatrixBlock mb = new MatrixBlock(10, 1, true); + mb.set(0, 0, 10); + mb.set(5, 0, 15); + mb.set(9, 0, 20); + + IColIndex cols = ColIndexFactory.create(1); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 3; + final EstimationFactors f = new EstimationFactors(3, nRow, offs, 0.3); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCSingleColumnEmpty() { + MatrixBlock mb = new MatrixBlock(10, 1, true); + + IColIndex cols = ColIndexFactory.create(1); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 0; + final EstimationFactors f = new EstimationFactors(0, nRow, offs, 0.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be Empty", groups.get(0) instanceof ColGroupEmpty); + } + + @Test + public void testCompressDeltaDDCMultiColumnWithGaps() { + MatrixBlock mb = new MatrixBlock(20, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(5, 0, 15); + mb.set(5, 1, 25); + mb.set(10, 0, 20); + mb.set(10, 1, 30); + mb.set(15, 0, 25); + mb.set(15, 1, 35); + + IColIndex cols = ColIndexFactory.create(2); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 4; + final EstimationFactors f = new EstimationFactors(4, nRow, offs, 0.2); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCMultiColumnEmpty() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + + IColIndex cols = ColIndexFactory.create(2); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 0; + final EstimationFactors f = new EstimationFactors(0, nRow, offs, 0.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be Empty", groups.get(0) instanceof ColGroupEmpty); + } + + @Test + public void testCompressDeltaDDCMultiColumnSparseWithGaps() { + MatrixBlock mb = new MatrixBlock(50, 3, true); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.set(0, 2, 3); + mb.set(10, 0, 11); + mb.set(10, 1, 12); + mb.set(10, 2, 13); + mb.set(20, 0, 21); + mb.set(20, 1, 22); + mb.set(20, 2, 23); + mb.set(30, 0, 31); + mb.set(30, 1, 32); + mb.set(30, 2, 33); + mb.set(40, 0, 41); + mb.set(40, 1, 42); + mb.set(40, 2, 43); + + IColIndex cols = ColIndexFactory.create(3); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 5; + final EstimationFactors f = new EstimationFactors(5, nRow, offs, 0.1); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCSingleColumnDense() { + MatrixBlock mb = new MatrixBlock(10, 1, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, i + 1); + } + + IColIndex cols = ColIndexFactory.create(1); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 10; + final EstimationFactors f = new EstimationFactors(10, nRow, offs, 1.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + + @Test + public void testCompressDeltaDDCMultiColumnDense() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, i + 1); + mb.set(i, 1, (i + 1) * 2); + } + + IColIndex cols = ColIndexFactory.create(2); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder(); + CompressionSettings cs = csb.create(); + + final int nRow = mb.getNumRows(); + final int offs = 10; + final EstimationFactors f = new EstimationFactors(10, nRow, offs, 1.0); + final List es = new ArrayList<>(); + es.add(new CompressedSizeInfoColGroup(cols, f, 314152, CompressionType.DeltaDDC)); + final CompressedSizeInfo csi = new CompressedSizeInfo(es); + + List groups = ColGroupFactory.compressColGroups(mb, csi, cs); + assertNotNull("Compression should succeed", groups); + assertEquals("Should have one column group", 1, groups.size()); + assertTrue("Should be DeltaDDC", groups.get(0) instanceof ColGroupDeltaDDC); + } + +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java index 0468de4dc04..c4da48a0232 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupFactoryTest.java @@ -323,9 +323,9 @@ public boolean isContiguous() { return false; } - @Override - public int numBlocks() { - return 2; - } + @Override + public int numBlocks() { + return 2; } } +} diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java index de2d310acce..f0a3dda1c1c 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDeltaDDCTest.java @@ -24,6 +24,8 @@ import org.apache.sysds.runtime.compress.colgroup.AColGroup; import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.util.DataConverter; +import org.apache.sysds.test.TestUtils; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -34,28 +36,25 @@ public class JolEstimateDeltaDDCTest extends JolEstimateTest { public static Collection data() { ArrayList tests = new ArrayList<>(); - // MatrixBlock mb; + MatrixBlock mb; - // mb = DataConverter.convertToMatrixBlock(new double[][] {{0}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{0}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{1}}); + tests.add(new Object[] {mb}); - // TODO add reader that reads as if Delta encoded. - // then afterwards use this test. + mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3, 4, 5}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3, 4, 5}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{1,2,3},{1,1,1}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1,2,3},{1,1,1}}); - // tests.add(new Object[] {mb}); + mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}}); + tests.add(new Object[] {mb}); - // mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}}); - // tests.add(new Object[] {mb}); - - // mb = TestUtils.generateTestMatrixBlock(2, 5, 0, 20, 1.0, 7); - // tests.add(new Object[] {mb}); + mb = TestUtils.generateTestMatrixBlock(2, 5, 0, 20, 1.0, 7); + tests.add(new Object[] {mb}); return tests; } @@ -68,4 +67,9 @@ public JolEstimateDeltaDDCTest(MatrixBlock mb) { public AColGroup.CompressionType getCT() { return delta; } + + @Override + protected boolean shouldTranspose() { + return false; + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java index 8c30b398b7c..f4ffe92eb60 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java @@ -70,6 +70,10 @@ public abstract class JolEstimateTest { public abstract CompressionType getCT(); + protected boolean shouldTranspose() { + return true; + } + private final long actualSize; private final int actualNumberUnique; private final AColGroup cg; @@ -77,16 +81,21 @@ public abstract class JolEstimateTest { public JolEstimateTest(MatrixBlock mbt) { CompressedMatrixBlock.debug = true; this.mbt = mbt; - colIndexes = ColIndexFactory.create(mbt.getNumRows()); + colIndexes = ColIndexFactory.create(shouldTranspose() ? mbt.getNumRows() : mbt.getNumColumns()); mbt.recomputeNonZeros(); mbt.examSparsity(); try { - CompressionSettings cs = new CompressionSettingsBuilder().setSamplingRatio(1.0) - .setValidCompressions(EnumSet.of(getCT())).create(); - cs.transposed = true; + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSamplingRatio(1.0) + .setValidCompressions(EnumSet.of(getCT())); + boolean useDelta = getCT() == CompressionType.DeltaDDC; + if(useDelta) + csb.setPreferDeltaEncoding(true); + CompressionSettings cs = csb.create(); + cs.transposed = shouldTranspose(); - final CompressedSizeInfoColGroup cgi = new ComEstExact(mbt, cs).getColGroupInfo(colIndexes); + final ComEstExact est = new ComEstExact(mbt, cs); + final CompressedSizeInfoColGroup cgi = useDelta ? est.getDeltaColGroupInfo(colIndexes) : est.getColGroupInfo(colIndexes); final CompressedSizeInfo csi = new CompressedSizeInfo(cgi); final List groups = ColGroupFactory.compressColGroups(mbt, csi, cs, 1); @@ -158,13 +167,17 @@ public void compressedSizeInfoEstimatorSample(double ratio, double tolerance) { if(mbt.getNumColumns() > 10000) tolerance *= 0.95; - final CompressionSettings cs = csb.setSamplingRatio(ratio).setMinimumSampleSize(10) - .setValidCompressions(EnumSet.of(getCT())).create(); - cs.transposed = true; + CompressionSettingsBuilder testCsb = csb.setSamplingRatio(ratio).setMinimumSampleSize(10) + .setValidCompressions(EnumSet.of(getCT())); + boolean useDelta = getCT() == CompressionType.DeltaDDC; + if(useDelta) + testCsb.setPreferDeltaEncoding(true); + final CompressionSettings cs = testCsb.create(); + cs.transposed = shouldTranspose(); final int sampleSize = Math.max(10, (int) (mbt.getNumColumns() * ratio)); final AComEst est = ComEstFactory.createEstimator(mbt, cs, sampleSize, 1); - final CompressedSizeInfoColGroup cInfo = est.getColGroupInfo(colIndexes); + final CompressedSizeInfoColGroup cInfo = useDelta ? est.getDeltaColGroupInfo(colIndexes) : est.getColGroupInfo(colIndexes); final int estimateNUniques = cInfo.getNumVals(); final double estimateCSI = (cg.getCompType() == CompressionType.CONST) ? ColGroupSizes diff --git a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java index 5ba6b88d251..52b88d83a53 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/dictionary/DeltaDictionaryTest.java @@ -18,13 +18,18 @@ */ package org.apache.sysds.test.component.compress.dictionary; -import org.apache.commons.lang3.NotImplementedException; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + import org.apache.sysds.runtime.compress.colgroup.dictionary.DeltaDictionary; -import org.apache.sysds.runtime.functionobjects.And; +import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary.DictType; import org.apache.sysds.runtime.functionobjects.Divide; -import org.apache.sysds.runtime.functionobjects.Minus; import org.apache.sysds.runtime.functionobjects.Multiply; -import org.apache.sysds.runtime.functionobjects.Plus; import org.apache.sysds.runtime.matrix.operators.LeftScalarOperator; import org.apache.sysds.runtime.matrix.operators.RightScalarOperator; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; @@ -83,51 +88,82 @@ public void testScalarOpRightDivideTwoColumns() { Assert.assertArrayEquals(expected, d.getValues(), 0.01); } + @Test - public void testScalarOpRightPlusSingleColumn() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2}, 1); - ScalarOperator sop = new RightScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {3, 2}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + public void testSerializationSingleColumn() throws IOException { + DeltaDictionary original = new DeltaDictionary(new double[] {1, 2, 3, 4, 5}, 1); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + original.write(dos); + Assert.assertEquals(original.getExactSizeOnDisk(), bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + IDictionary deserialized = DictionaryFactory.read(dis); + + Assert.assertTrue("Deserialized dictionary should be DeltaDictionary", deserialized instanceof DeltaDictionary); + DeltaDictionary deltaDict = (DeltaDictionary) deserialized; + Assert.assertArrayEquals("Values should match after serialization", original.getValues(), deltaDict.getValues(), 0.01); } @Test - public void testScalarOpRightPlusTwoColumns() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new RightScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {3, 4, 3, 4}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + public void testSerializationTwoColumns() throws IOException { + DeltaDictionary original = new DeltaDictionary(new double[] {1, 2, 3, 4, 5, 6}, 2); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + original.write(dos); + Assert.assertEquals(original.getExactSizeOnDisk(), bos.size()); + + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + IDictionary deserialized = DictionaryFactory.read(dis); + + Assert.assertTrue("Deserialized dictionary should be DeltaDictionary", deserialized instanceof DeltaDictionary); + DeltaDictionary deltaDict = (DeltaDictionary) deserialized; + Assert.assertArrayEquals("Values should match after serialization", original.getValues(), deltaDict.getValues(), 0.01); } @Test - public void testScalarOpRightMinusTwoColumns() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new RightScalarOperator(Minus.getMinusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {-1, 0, 3, 4}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + public void testGetValue() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, 2); + Assert.assertEquals(1.0, d.getValue(0, 0, 2), 0.01); + Assert.assertEquals(2.0, d.getValue(0, 1, 2), 0.01); + Assert.assertEquals(3.0, d.getValue(1, 0, 2), 0.01); + Assert.assertEquals(4.0, d.getValue(1, 1, 2), 0.01); + Assert.assertEquals(5.0, d.getValue(2, 0, 2), 0.01); + Assert.assertEquals(6.0, d.getValue(2, 1, 2), 0.01); } @Test - public void testScalarOpLeftPlusTwoColumns() { - double scalar = 2; - DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new LeftScalarOperator(Plus.getPlusFnObject(), scalar, 1); - d = d.applyScalarOp(sop); - double[] expected = new double[] {3, 4, 3, 4}; - Assert.assertArrayEquals(expected, d.getValues(), 0.01); + public void testGetValueSingleColumn() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0}, 1); + Assert.assertEquals(1.0, d.getValue(0, 0, 1), 0.01); + Assert.assertEquals(2.0, d.getValue(1, 0, 1), 0.01); + Assert.assertEquals(3.0, d.getValue(2, 0, 1), 0.01); } - @Test(expected = NotImplementedException.class) - public void testNotImplemented() { - double scalar = 2; + @Test + public void testGetDictType() { DeltaDictionary d = new DeltaDictionary(new double[] {1, 2, 3, 4}, 2); - ScalarOperator sop = new LeftScalarOperator(And.getAndFnObject(), scalar, 1); - d = d.applyScalarOp(sop); + Assert.assertEquals(DictType.Delta, d.getDictType()); } + + @Test + public void testGetString() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0, 4.0}, 2); + String result = d.getString(2); + String expected = "1.0, 2.0\n3.0, 4.0"; + Assert.assertEquals(expected, result); + } + + @Test + public void testGetStringSingleColumn() { + DeltaDictionary d = new DeltaDictionary(new double[] {1.0, 2.0, 3.0}, 1); + String result = d.getString(1); + String expected = "1.0\n2.0\n3.0"; + Assert.assertEquals(expected, result); + } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java new file mode 100644 index 00000000000..8cb3d93a58c --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeDeltaTest.java @@ -0,0 +1,468 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.estim.encoding; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory; +import org.apache.sysds.runtime.compress.estim.encoding.EmptyEncoding; +import org.apache.sysds.runtime.compress.estim.encoding.IEncode; +import org.apache.sysds.runtime.compress.estim.encoding.DenseEncoding; +import org.apache.sysds.runtime.compress.estim.encoding.SparseEncoding; +import org.apache.sysds.runtime.compress.estim.encoding.ConstEncoding; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class EncodeDeltaTest { + + @Test + public void testCreateFromMatrixBlockDeltaBasic() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [10,20] stored as-is, deltas [1,1] for rows 1-2, so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); + assertTrue("Encoding should be dense", encoding.isDense()); + } + + @Test + public void testCreateFromMatrixBlockDeltaWithSampleSize() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 3); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Sample size is 3, so should process 3 rows", 3, ((DenseEncoding) encoding).getMap().size()); + assertTrue("Should have at least 1 unique delta value", encoding.getUnique() >= 1); + assertTrue("Should have at most 3 unique delta values (one per row)", encoding.getUnique() <= 3); + } + + @Test + public void testCreateFromMatrixBlockDeltaFirstRowAsIs() { + MatrixBlock mb = new MatrixBlock(2, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 10); + mb.set(1, 0, 5); + mb.set(1, 1, 10); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [5,10] stored as-is, delta [0,0] for row 1. Map has 2 unique: [5,10] and [0,0]. With zero=true, unique = 2 + 1 = 3", 3, encoding.getUnique()); + } + + @Test + public void testCreateFromMatrixBlockDeltaConstantDeltas() { + MatrixBlock mb = new MatrixBlock(4, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(3, 0, 13); + mb.set(3, 1, 23); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [10,20] stored as-is, all deltas are [1,1], so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); + assertTrue("Encoding should be dense", encoding.isDense()); + } + + @Test + public void testCreateFromMatrixBlockDeltaSingleRow() { + MatrixBlock mb = new MatrixBlock(1, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + // Single row results in ConstEncoding because there is only 1 unique value (the row itself) + assertTrue("Single row should result in ConstEncoding", encoding instanceof ConstEncoding); + assertEquals("Single row has no deltas, so should have 1 unique value (the row itself)", 1, encoding.getUnique()); + } + + @Test + public void testCreateFromMatrixBlockDeltaSparse() { + MatrixBlock mb = new MatrixBlock(3, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(2, 1, 22); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Sparse input may result in SparseEncoding or DenseEncoding", + encoding instanceof DenseEncoding || encoding instanceof SparseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + + @Test + public void testCreateFromMatrixBlockDeltaColumnSelection() { + MatrixBlock mb = new MatrixBlock(3, 4, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(0, 2, 30); + mb.set(0, 3, 40); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(1, 2, 31); + mb.set(1, 3, 41); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(2, 2, 32); + mb.set(2, 3, 42); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(0, 2)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Selected columns 0 and 2: first row [10,30] stored as-is, deltas [1,1] for rows 1-2, so 2 unique: [10,30] and [1,1]", 2, encoding.getUnique()); + assertEquals("Should have 3 rows in mapping", 3, ((DenseEncoding) encoding).getMap().size()); + } + + @Test + public void testCreateFromMatrixBlockDeltaNegativeValues() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 8); + mb.set(1, 1, 15); + mb.set(2, 0, 12); + mb.set(2, 1, 25); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + // Deltas: R0=[10,20], R1=[-2,-5], R2=[4,10] -> 3 unique values + assertEquals("Should have 3 unique values", 3, encoding.getUnique()); + } + + @Test + public void testCreateFromMatrixBlockDeltaZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 0); + mb.set(1, 0, 5); + mb.set(1, 1, 0); + mb.set(2, 0, 0); + mb.set(2, 1, 5); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding or SparseEncoding", + encoding instanceof DenseEncoding || encoding instanceof SparseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + + + @Test + public void testCreateFromMatrixBlockDeltaLargeMatrix() { + MatrixBlock mb = new MatrixBlock(100, 3, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 100; i++) { + mb.set(i, 0, i); + mb.set(i, 1, i * 2); + mb.set(i, 2, i * 3); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(3)); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("First row [0,0,0] stored as-is, all deltas are [1,2,3]. Map has 2 unique: [0,0,0] and [1,2,3]. All rows have non-zero deltas, so offsets.size()=100=ru, zero=false, unique=2", 2, encoding.getUnique()); + assertEquals("Should have 100 rows in mapping", 100, ((DenseEncoding) encoding).getMap().size()); + } + + @Test + public void testCreateFromMatrixBlockDeltaSampleSizeSmaller() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 5); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Sample size is 5, so should process 5 rows", 5, ((DenseEncoding) encoding).getMap().size()); + assertEquals("First row [10,20] stored as-is, all deltas are [1,1], so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); + } + + @Test + public void testCreateFromMatrixBlockDeltaSampleSizeLarger() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Encoding should be DenseEncoding", encoding instanceof DenseEncoding); + assertEquals("Sample size 10 > matrix rows 5, so should process all 5 rows", 5, ((DenseEncoding) encoding).getMap().size()); + assertEquals("First row [10,20] stored as-is, all deltas are [1,1], so 2 unique: [10,20] and [1,1]", 2, encoding.getUnique()); + } + + @Test + public void testCreateFromMatrixBlockDeltaEmptyMatrix() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + // Empty matrix (all zeros) is constant 0 in delta encoding + assertTrue("Empty matrix should result in ConstEncoding or EmptyEncoding", + encoding instanceof ConstEncoding || encoding instanceof EmptyEncoding); + // Both ConstEncoding(0) and EmptyEncoding return 1 unique value (the zero tuple) + assertEquals("Encoding of zeros should have 1 unique value", 1, encoding.getUnique()); + } + + @Test + public void testCreateFromMatrixBlockDeltaEmptyMatrixSparse() { + MatrixBlock mb = new MatrixBlock(5, 2, true); + mb.setNonZeros(0); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2)); + assertNotNull("Encoding should not be null", encoding); + // Empty sparse matrix is also constant 0 + assertTrue("Empty sparse matrix should result in ConstEncoding or EmptyEncoding", + encoding instanceof ConstEncoding || encoding instanceof EmptyEncoding); + // Both ConstEncoding(0) and EmptyEncoding return 1 unique value (the zero tuple) + assertEquals("Encoding of zeros should have 1 unique value", 1, encoding.getUnique()); + } + + @Test + public void testCombineTwoDenseDeltaEncodings() { + MatrixBlock mb1 = new MatrixBlock(3, 1, false); + mb1.allocateDenseBlock(); + mb1.set(0, 0, 10); + mb1.set(1, 0, 11); + mb1.set(2, 0, 12); + + MatrixBlock mb2 = new MatrixBlock(3, 1, false); + mb2.allocateDenseBlock(); + mb2.set(0, 0, 20); + mb2.set(1, 0, 21); + mb2.set(2, 0, 22); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb1, false, ColIndexFactory.create(1)); + IEncode enc2 = EncodingFactory.createFromMatrixBlockDelta(mb2, false, ColIndexFactory.create(1)); + + assertNotNull("First encoding should not be null", enc1); + assertNotNull("Second encoding should not be null", enc2); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + assertTrue("Second encoding should be DenseEncoding", enc2 instanceof DenseEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertTrue("Combined encoding should be DenseEncoding", combined instanceof DenseEncoding); + assertTrue("Combined unique count should be at least max of inputs", + combined.getUnique() >= Math.max(enc1.getUnique(), enc2.getUnique())); + assertTrue("Combined unique count should be at most product of inputs", + combined.getUnique() <= enc1.getUnique() * enc2.getUnique()); + assertEquals("Combined mapping should have same size as input", + ((DenseEncoding) enc1).getMap().size(), ((DenseEncoding) combined).getMap().size()); + } + + @Test + public void testCombineDenseDeltaEncodingWithEmpty() { + MatrixBlock mb = new MatrixBlock(3, 1, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(1, 0, 11); + mb.set(2, 0, 12); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(1)); + IEncode enc2 = new EmptyEncoding(); + + assertNotNull("First encoding should not be null", enc1); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertEquals("Combining with EmptyEncoding should return original encoding", enc1, combined); + } + + @Test + public void testCombineDenseDeltaEncodingWithConst() { + MatrixBlock mb = new MatrixBlock(3, 1, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(1, 0, 11); + mb.set(2, 0, 12); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(1)); + + MatrixBlock constMb = new MatrixBlock(3, 1, false); + constMb.allocateDenseBlock(); + constMb.set(0, 0, 5); + constMb.set(1, 0, 5); + constMb.set(2, 0, 5); + IEncode enc2 = EncodingFactory.createFromMatrixBlock(constMb, false, ColIndexFactory.create(1)); + + assertNotNull("First encoding should not be null", enc1); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + assertTrue("Second encoding should be ConstEncoding", enc2 instanceof ConstEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertEquals("Combining with ConstEncoding should return original encoding", enc1, combined); + } + + @Test + public void testCombineDenseDeltaEncodingsWithDifferentDeltas() { + MatrixBlock mb1 = new MatrixBlock(4, 1, false); + mb1.allocateDenseBlock(); + mb1.set(0, 0, 1); + mb1.set(1, 0, 2); + mb1.set(2, 0, 4); + mb1.set(3, 0, 8); + + MatrixBlock mb2 = new MatrixBlock(4, 1, false); + mb2.allocateDenseBlock(); + mb2.set(0, 0, 10); + mb2.set(1, 0, 20); + mb2.set(2, 0, 40); + mb2.set(3, 0, 80); + + IEncode enc1 = EncodingFactory.createFromMatrixBlockDelta(mb1, false, ColIndexFactory.create(1)); + IEncode enc2 = EncodingFactory.createFromMatrixBlockDelta(mb2, false, ColIndexFactory.create(1)); + + assertNotNull("First encoding should not be null", enc1); + assertNotNull("Second encoding should not be null", enc2); + assertTrue("First encoding should be DenseEncoding", enc1 instanceof DenseEncoding); + assertTrue("Second encoding should be DenseEncoding", enc2 instanceof DenseEncoding); + + IEncode combined = enc1.combine(enc2); + assertNotNull("Combined encoding should not be null", combined); + assertTrue("Combined encoding should be DenseEncoding", combined instanceof DenseEncoding); + assertEquals("Combined mapping should have same size as input", + 4, ((DenseEncoding) combined).getMap().size()); + } + + @Test + public void testCreateFromMatrixBlockDeltaDensePath() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(3, 0, 13); + mb.set(3, 1, 23); + mb.set(4, 0, 14); + mb.set(4, 1, 24); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Should result in DenseEncoding (5 non-zero rows >= 10/4=2.5, so dense path)", + encoding instanceof DenseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + + @Test + public void testCreateFromMatrixBlockDeltaEmptyEncoding() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Empty matrix should result in EmptyEncoding", encoding instanceof EmptyEncoding); + } + + @Test + public void testCreateFromMatrixBlockDeltaConstEncoding() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10); + mb.set(i, 1, 20); + } + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 5); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Constant matrix with delta encoding: first row is absolute [10,20], rest are deltas [0,0], so map.size()=2, not ConstEncoding", + encoding instanceof DenseEncoding || encoding instanceof SparseEncoding); + assertTrue("Should have 2 unique values (first row absolute, rest are zero deltas)", encoding.getUnique() >= 2); + } + + + @Test + public void testCreateFromMatrixBlockDeltaSparseEncoding() { + MatrixBlock mb = new MatrixBlock(20, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 20); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Sparse matrix with few non-zero rows (3 < 20/4=5) should result in SparseEncoding", + encoding instanceof SparseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + + @Test + public void testCreateFromMatrixBlockDeltaDenseWithZero() { + MatrixBlock mb = new MatrixBlock(10, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(3, 0, 13); + mb.set(3, 1, 23); + + IEncode encoding = EncodingFactory.createFromMatrixBlockDelta(mb, false, ColIndexFactory.create(2), 10); + assertNotNull("Encoding should not be null", encoding); + assertTrue("Sparse matrix with some non-zero rows (4 >= 10/4=2.5 but 4 < 10) should result in DenseEncoding with zero=true", + encoding instanceof DenseEncoding); + assertTrue("Should have at least 1 unique value", encoding.getUnique() >= 1); + } + +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java index d2d255c0da9..caa56a44d5e 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java @@ -20,6 +20,7 @@ package org.apache.sysds.test.component.compress.estim.encoding; import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory; import org.apache.sysds.runtime.data.DenseBlockFP64; import org.apache.sysds.runtime.matrix.data.MatrixBlock; @@ -44,12 +45,12 @@ public void encodeNonContiguousTransposed() { EncodingFactory.createFromMatrixBlock(mock, true, 3); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testInvalidToCallWithNullDeltaTransposed() { EncodingFactory.createFromMatrixBlockDelta(null, true, null); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testInvalidToCallWithNullDelta() { EncodingFactory.createFromMatrixBlockDelta(null, false, null); } @@ -61,20 +62,30 @@ public void testInvalidToCallWithNull() { @Test(expected = NotImplementedException.class) public void testDeltaTransposed() { - EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), true, null); + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.setNonZeros(2); + EncodingFactory.createFromMatrixBlockDelta(mb, true, ColIndexFactory.create(2)); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testDelta() { EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), false, null); } @Test(expected = NotImplementedException.class) public void testDeltaTransposedNVals() { - EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), true, null, 2); + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.setNonZeros(2); + EncodingFactory.createFromMatrixBlockDelta(mb, true, ColIndexFactory.create(2), 2); } - @Test(expected = NotImplementedException.class) + @Test(expected = NullPointerException.class) public void testDeltaNVals() { EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 10, false), false, null, 1); } diff --git a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java new file mode 100644 index 00000000000..414db621ade --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibUnaryDeltaTest.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.lib; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; +import org.apache.sysds.runtime.compress.lib.CLALibUnary; +import org.apache.sysds.runtime.functionobjects.Builtin; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.operators.UnaryOperator; +import org.apache.sysds.test.TestUtils; +import org.junit.Test; + +public class CLALibUnaryDeltaTest { + + protected static final Log LOG = LogFactory.getLog(CLALibUnaryDeltaTest.class.getName()); + + @Test + public void testCumsumResultsInDeltaEncoding() { + // Use data that results in repetitive deltas to ensure DeltaDDC is chosen + MatrixBlock mb = new MatrixBlock(20, 1, false); + mb.allocateDenseBlock(); + // Input: 1, 2, 1, 2, ... + // Cumsum: 1, 3, 4, 6, ... + // Deltas: 1, 2, 1, 2, ... + for(int i = 0; i < 20; i++) { + mb.set(i, 0, (i % 2 == 0) ? 1.0 : 2.0); + } + mb.setNonZeros(20); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + + assertNotNull("Result should not be null", result); + assertTrue("Result should be compressed", result instanceof CompressedMatrixBlock); + + CompressedMatrixBlock compressedResult = (CompressedMatrixBlock) result; + boolean hasDeltaDDC = false; + for(AColGroup cg : compressedResult.getColGroups()) { + if(cg.getCompType() == CompressionType.DeltaDDC) { + hasDeltaDDC = true; + break; + } + } + + assertTrue("Result should contain DeltaDDC column group", hasDeltaDDC); + } + + @Test + public void testCumsumCorrectness() { + MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 3, 0, 10, 1.0, 7); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result should match expected"); + } + + @Test + public void testRowcumsumCorrectness() { + MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 5, 0, 10, 1.0, 7); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator rowCumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ROWCUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, rowCumsumOp, null); + MatrixBlock expected = mb.unaryOperations(rowCumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "RowCumsum result should match expected"); + } + + @Test + public void testNonCumsumOperationDoesNotUseDeltaEncoding() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 10; i++) { + mb.set(i, 0, i); + mb.set(i, 1, i * 2); + } + mb.setNonZeros(20); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator absOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.ABS)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, absOp, null); + + assertNotNull("Result should not be null", result); + + if(result instanceof CompressedMatrixBlock) { + CompressedMatrixBlock compressedResult = (CompressedMatrixBlock) result; + boolean hasDeltaDDC = false; + for(AColGroup cg : compressedResult.getColGroups()) { + if(cg.getCompType() == CompressionType.DeltaDDC) { + hasDeltaDDC = true; + break; + } + } + // Should not have delta DDC + assertTrue("Result should NOT contain DeltaDDC column group for ABS", !hasDeltaDDC); + } + // If not compressed, it's also fine (standard execution) + } + + @Test + public void testCumsumSparseMatrix() { + MatrixBlock mb = new MatrixBlock(100, 10, true); + mb.set(0, 0, 1.0); + mb.set(10, 0, 2.0); + mb.set(20, 0, 3.0); + mb.setNonZeros(3); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result for sparse matrix should match expected"); + } + + @Test + public void testCumsumWithDifferentInputCompressionTypes() { + MatrixBlock mb = new MatrixBlock(10, 1, false); + mb.allocateDenseBlock(); + // RLE friendly data: 1, 1, 1, 2, 2, 2, 3, 3, 3, 4 + for(int i=0; i<3; i++) mb.set(i, 0, 1.0); + for(int i=3; i<6; i++) mb.set(i, 0, 2.0); + for(int i=6; i<9; i++) mb.set(i, 0, 3.0); + mb.set(9, 0, 4.0); + mb.setNonZeros(10); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.RLE); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + + assertTrue("Result should be compressed", result instanceof CompressedMatrixBlock); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result from RLE input should match expected"); + } + + @Test + public void testCumsumLargeMatrix() { + // Larger matrix to trigger multi-threaded execution if applicable + MatrixBlock mb = TestUtils.generateTestMatrixBlock(100, 5, 0, 100, 1.0, 7); + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result for large matrix should match expected"); + } + + @Test + public void testCumsumWithConstantColumns() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + for(int i=0; i<10; i++) { + mb.set(i, 0, 1.0); // Constant column + mb.set(i, 1, i); // Increasing column + } + mb.setNonZeros(20); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + csb.addValidCompression(CompressionType.CONST); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result with constant columns should match expected"); + } + + @Test + public void testCumsumMultiColumn() { + MatrixBlock mb = new MatrixBlock(10, 4, false); + mb.allocateDenseBlock(); + for(int i=0; i<10; i++) { + for(int j=0; j<4; j++) { + mb.set(i, j, i+j); + } + } + mb.setNonZeros(40); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.DDC); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result for multi-column matrix should match expected"); + } + + @Test + public void testCumsumWhenDeltaDDCNotInValidCompressions() { + MatrixBlock mb = new MatrixBlock(4, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1.0); + mb.set(0, 1, 2.0); + mb.set(1, 0, 3.0); + mb.set(1, 1, 4.0); + mb.set(2, 0, 5.0); + mb.set(2, 1, 6.0); + mb.set(3, 0, 7.0); + mb.set(3, 1, 8.0); + mb.setNonZeros(8); + + CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setMinimumCompressionRatio(0.0); + csb.addValidCompression(CompressionType.RLE); + CompressedMatrixBlock cmb = compress(mb, csb); + + UnaryOperator cumsumOp = new UnaryOperator(Builtin.getBuiltinFnObject(Builtin.BuiltinCode.CUMSUM)); + MatrixBlock result = CLALibUnary.unaryOperations(cmb, cumsumOp, null); + + assertNotNull("Result should not be null", result); + MatrixBlock expected = mb.unaryOperations(cumsumOp, new MatrixBlock()); + TestUtils.compareMatrices(expected, result, 0.0, "Cumsum result should match expected even when DeltaDDC not in valid compressions"); + } + + private CompressedMatrixBlock compress(MatrixBlock mb, CompressionSettingsBuilder csb) { + MatrixBlock mbComp = CompressedMatrixBlockFactory.compress(mb, 1, csb).getLeft(); + if(mbComp instanceof CompressedMatrixBlock) + return (CompressedMatrixBlock) mbComp; + else + return CompressedMatrixBlockFactory.genUncompressedCompressedMatrixBlock(mbComp); + } +} diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReaderColumnSelectionSparseDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReaderColumnSelectionSparseDeltaTest.java new file mode 100644 index 00000000000..37aeb8fb987 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReaderColumnSelectionSparseDeltaTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.readers; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ReaderColumnSelectionSparseDeltaTest { + + @Test + public void testSparseDeltaReaderEmptyRowSkips() { + MatrixBlock mb = new MatrixBlock(4, 3, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 0, 1.0); + mb.appendValue(2, 0, 5.0); + mb.appendValue(3, 2, 10.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {0}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertEquals(1.0, row0.getData()[0], 0.0); + + DblArray row1 = reader.nextRow(); + assertEquals(-1.0, row1.getData()[0], 0.0); + + DblArray row2 = reader.nextRow(); + assertEquals(5.0, row2.getData()[0], 0.0); + + DblArray row3 = reader.nextRow(); + assertEquals(-5.0, row3.getData()[0], 0.0); + } + + @Test + public void testSparseDeltaReaderTargetSmallerThanSparse() { + MatrixBlock mb = new MatrixBlock(2, 5, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 1, 10.0); + mb.appendValue(0, 3, 20.0); + + mb.appendValue(1, 2, 30.0); + mb.appendValue(1, 4, 40.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {0, 2}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertEquals(0.0, row0.getData()[0], 0.0); + assertEquals(0.0, row0.getData()[1], 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertEquals(0.0, row1.getData()[0], 0.0); + assertEquals(30.0, row1.getData()[1], 0.0); + } + + @Test + public void testSparseDeltaReaderColumnIndexAheadOfSparse() { + MatrixBlock mb = new MatrixBlock(2, 10, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 1, 10.0); + mb.appendValue(0, 2, 15.0); + + mb.appendValue(1, 1, 20.0); + mb.appendValue(1, 2, 25.0); + mb.appendValue(1, 3, 30.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {3, 4}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertEquals(0.0, row0.getData()[0], 0.0); + assertEquals(0.0, row0.getData()[1], 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertEquals(30.0, row1.getData()[0], 0.0); + assertEquals(0.0, row1.getData()[1], 0.0); + } + + @Test + public void testSparseDeltaReaderColumnIndexBehindSparse() { + MatrixBlock mb = new MatrixBlock(2, 10, true); + mb.allocateSparseRowsBlock(); + + mb.appendValue(0, 3, 10.0); + mb.appendValue(0, 5, 20.0); + + mb.appendValue(1, 1, 30.0); + mb.appendValue(1, 7, 40.0); + + IColIndex colIndexes = ColIndexFactory.create(new int[] {1, 3, 5}); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, colIndexes, false); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertEquals(0.0, row0.getData()[0], 0.0); + assertEquals(10.0, row0.getData()[1], 0.0); + assertEquals(20.0, row0.getData()[2], 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertEquals(30.0, row1.getData()[0], 0.0); + assertEquals(-10.0, row1.getData()[1], 0.0); + assertEquals(-20.0, row1.getData()[2], 0.0); + } +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java new file mode 100644 index 00000000000..cf6e3627141 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersDeltaTest.java @@ -0,0 +1,654 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.compress.readers; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.DMLCompressionException; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.indexes.IIterate; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionDenseSingleBlockDelta; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionDenseMultiBlockDelta; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionSparseDelta; +import org.apache.sysds.runtime.compress.readers.ReaderColumnSelectionEmpty; +import org.apache.sysds.runtime.compress.utils.DblArray; +import org.apache.sysds.runtime.data.DenseBlockFP64; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.junit.Test; + +public class ReadersDeltaTest { + + protected static final Log LOG = LogFactory.getLog(ReadersDeltaTest.class.getName()); + + @Test + public void testDeltaReaderDenseSingleBlockBasic() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionDenseSingleBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertNotNull(row2); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderNegativeValues() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 8); + mb.set(1, 1, 15); + mb.set(2, 0, 12); + mb.set(2, 1, 25); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + reader.nextRow(); + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {-2, -5}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {4, 10}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 0); + mb.set(1, 0, 5); + mb.set(1, 1, 0); + mb.set(2, 0, 0); + mb.set(2, 1, 5); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + reader.nextRow(); + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {0, 0}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {-5, 5}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderSingleRow() { + MatrixBlock mb = new MatrixBlock(1, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderTwoRows() { + MatrixBlock mb = new MatrixBlock(2, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 15); + mb.set(1, 1, 25); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {5, 5}, row1.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderColumnSelection() { + MatrixBlock mb = new MatrixBlock(3, 4, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(0, 2, 30); + mb.set(0, 3, 40); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(1, 2, 31); + mb.set(1, 3, 41); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(2, 2, 32); + mb.set(2, 3, 42); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.createI(0, 2), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 30}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderSparse() { + MatrixBlock mb = new MatrixBlock(3, 2, true); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(2, 1, 22); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionSparseDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {1, -20}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {-11, 22}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderSparseZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, true); + mb.set(0, 0, 5); + mb.set(1, 1, 10); + mb.set(2, 0, 5); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {5, 0}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {-5, 10}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {5, -10}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderRange() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false, 1, 4); + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {11, 21}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + DblArray row3 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row3.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test(expected = DMLCompressionException.class) + public void testDeltaReaderInvalidRange() { + MatrixBlock mb = new MatrixBlock(10, 2, false); + mb.allocateDenseBlock(); + ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false, 10, 9); + } + + + @Test + public void testDeltaReaderLargeMatrix() { + MatrixBlock mb = new MatrixBlock(100, 3, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 100; i++) { + mb.set(i, 0, i); + mb.set(i, 1, i * 2); + mb.set(i, 2, i * 3); + } + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(3), false); + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {0, 0, 0}, row0.getData(), 0.0); + + for(int i = 1; i < 100; i++) { + DblArray row = reader.nextRow(); + assertNotNull(row); + assertArrayEquals(new double[] {1, 2, 3}, row.getData(), 0.0); + } + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderEmptyMatrix() { + // Test empty matrix with dimensions but all zeros + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + // Matrix has dimensions but is empty (all zeros) + // isEmpty() should return true + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertTrue(reader instanceof ReaderColumnSelectionEmpty); + + // Empty reader should return null immediately + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderEmptyMatrixSparse() { + // Test empty sparse matrix with dimensions + MatrixBlock mb = new MatrixBlock(5, 2, true); + // Sparse matrix with no values is empty + mb.setNonZeros(0); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertTrue(reader instanceof ReaderColumnSelectionEmpty); + + // Empty reader should return null immediately + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderDenseMultiBlock() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertNotNull(row2); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderDenseMultiBlockSingleRow() { + MatrixBlock mb = new MatrixBlock(1, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertNotNull(reader); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderDenseMultiBlockNegativeValues() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 8); + mb.set(1, 1, 15); + mb.set(2, 0, 12); + mb.set(2, 1, 25); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {-2, -5}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {4, 10}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderDenseMultiBlockColumnSelection() { + MatrixBlock mb = new MatrixBlock(3, 4, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(0, 2, 30); + mb.set(0, 3, 40); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(1, 2, 31); + mb.set(1, 3, 41); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + mb.set(2, 2, 32); + mb.set(2, 3, 42); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.createI(0, 2), false); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {10, 30}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + } + + @Test + public void testDeltaReaderDenseMultiBlockWithRange() { + MatrixBlock mb = new MatrixBlock(5, 2, false); + mb.allocateDenseBlock(); + for(int i = 0; i < 5; i++) { + mb.set(i, 0, 10 + i); + mb.set(i, 1, 20 + i); + } + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false, 1, 4); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {11, 21}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + DblArray row3 = reader.nextRow(); + assertArrayEquals(new double[] {1, 1}, row3.getData(), 0.0); + + assertNull(reader.nextRow()); + } + + @Test + public void testDeltaReaderDenseMultiBlockZeros() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 5); + mb.set(0, 1, 0); + mb.set(1, 0, 5); + mb.set(1, 1, 0); + mb.set(2, 0, 0); + mb.set(2, 1, 5); + + MatrixBlock mbMultiBlock = new MatrixBlock(mb.getNumRows(), mb.getNumColumns(), + new DenseBlockFP64Mock(mb.getNumRows(), mb.getNumColumns(), mb.getDenseBlockValues())); + mbMultiBlock.setNonZeros(mb.getNonZeros()); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mbMultiBlock, ColIndexFactory.create(2), false); + assertEquals(ReaderColumnSelectionDenseMultiBlockDelta.class, reader.getClass()); + + DblArray row0 = reader.nextRow(); + assertArrayEquals(new double[] {5, 0}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertArrayEquals(new double[] {0, 0}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertArrayEquals(new double[] {-5, 5}, row2.getData(), 0.0); + } + + @Test(expected = DMLCompressionException.class) + public void testDeltaReaderEmptyColumnIndices() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + IColIndex emptyColIndex = new EmptyColIndexMock(); + ReaderColumnSelection.createDeltaReader(mb, emptyColIndex, false); + } + + private static class DenseBlockFP64Mock extends DenseBlockFP64 { + private static final long serialVersionUID = -3601232958390554672L; + + public DenseBlockFP64Mock(int nRow, int nCol, double[] data) { + super(new int[] {nRow, nCol}, data); + } + + @Override + public boolean isContiguous() { + return false; + } + + @Override + public int numBlocks() { + return 2; + } + } + + private static class EmptyColIndexMock implements IColIndex { + @Override + public int size() { + return 0; + } + + @Override + public int get(int i) { + throw new IndexOutOfBoundsException(); + } + + @Override + public IColIndex combine(IColIndex other) { + throw new UnsupportedOperationException(); + } + + @Override + public IColIndex shift(int i) { + throw new UnsupportedOperationException(); + } + + @Override + public IColIndex.SliceResult slice(int l, int u) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean contains(int i) { + return false; + } + + @Override + public boolean contains(IColIndex a, IColIndex b) { + return false; + } + + @Override + public boolean containsStrict(IColIndex a, IColIndex b) { + return false; + } + + @Override + public boolean containsAny(IColIndex idx) { + return false; + } + + @Override + public int findIndex(int i) { + return -1; + } + + @Override + public boolean equals(Object other) { + return other instanceof IColIndex && equals((IColIndex) other); + } + + @Override + public boolean equals(IColIndex other) { + return other != null && other.size() == 0; + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public IIterate iterator() { + return new IIterate() { + @Override + public boolean hasNext() { + return false; + } + + @Override + public int next() { + throw new java.util.NoSuchElementException(); + } + + @Override + public int v() { + throw new java.util.NoSuchElementException(); + } + + @Override + public int i() { + return -1; + } + }; + } + + @Override + public void write(DataOutput out) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long getExactSizeOnDisk() { + return 0; + } + + @Override + public long estimateInMemorySize() { + return 0; + } + + @Override + public boolean isContiguous() { + return false; + } + + @Override + public int[] getReorderingIndex() { + return new int[0]; + } + + @Override + public boolean isSorted() { + return true; + } + + @Override + public IColIndex sort() { + return this; + } + + @Override + public double avgOfIndex() { + return 0; + } + + @Override + public void decompressToDenseFromSparse(org.apache.sysds.runtime.data.SparseBlock sb, int vr, int off, double[] c) { + throw new UnsupportedOperationException(); + } + + @Override + public void decompressVec(int nCol, double[] c, int off, double[] values, int rowIdx) { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "EmptyColIndexMock[]"; + } + } + +} + diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java index ae92d3a4313..94e2fb5c29f 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTest.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.DMLCompressionException; @@ -39,10 +40,11 @@ public class ReadersTest { protected static final Log LOG = LogFactory.getLog(ReadersTest.class.getName()); - @Test(expected = DMLCompressionException.class) + @Test public void testDenseSingleCol() { MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 1, 1, 1, 0.5, 21342); - ReaderColumnSelection.createReader(mb, ColIndexFactory.create(1), false); + ReaderColumnSelection reader = ReaderColumnSelection.createReader(mb, ColIndexFactory.create(1), false); + assertNotNull(reader); } @Test @@ -125,6 +127,49 @@ public void testReaderColumnSelectionQuantized() { } } } - + + @Test + public void testDeltaReaderBasic() { + MatrixBlock mb = new MatrixBlock(3, 2, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 10); + mb.set(0, 1, 20); + mb.set(1, 0, 11); + mb.set(1, 1, 21); + mb.set(2, 0, 12); + mb.set(2, 1, 22); + + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), false); + DblArray row0 = reader.nextRow(); + assertNotNull(row0); + assertArrayEquals(new double[] {10, 20}, row0.getData(), 0.0); + + DblArray row1 = reader.nextRow(); + assertNotNull(row1); + assertArrayEquals(new double[] {1, 1}, row1.getData(), 0.0); + + DblArray row2 = reader.nextRow(); + assertNotNull(row2); + assertArrayEquals(new double[] {1, 1}, row2.getData(), 0.0); + + assertEquals(null, reader.nextRow()); + } + + @Test + public void testDeltaReaderSingleCol() { + MatrixBlock mb = TestUtils.generateTestMatrixBlock(10, 1, 1, 1, 0.5, 21342); + ReaderColumnSelection reader = ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(1), false); + assertNotNull(reader); + } + + @Test(expected = NotImplementedException.class) + public void testDeltaReaderTransposed() { + MatrixBlock mb = new MatrixBlock(10, 10, false); + mb.allocateDenseBlock(); + mb.set(0, 0, 1); + mb.set(0, 1, 2); + mb.setNonZeros(2); + ReaderColumnSelection.createDeltaReader(mb, ColIndexFactory.create(2), true); + } }