From cc5c61e2b6ac0a6a4c4054b44578829348b1c599 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sat, 28 Dec 2024 16:18:24 +0100 Subject: [PATCH 01/11] [MINOR] Mapping Add A Range Setting This commit adds a range setting function for mappings, to enable subsequent parallel setting from integer arrays. Signed-off-by: Sebastian Baunsgaard --- .../compress/colgroup/mapping/AMapToData.java | 91 +++++++++++++++++-- .../compress/colgroup/mapping/MapToBit.java | 11 ++- .../compress/colgroup/mapping/MapToByte.java | 25 ++++- .../compress/colgroup/mapping/MapToChar.java | 64 ++++++++++++- .../colgroup/mapping/MapToCharPByte.java | 11 ++- .../colgroup/mapping/MapToFactory.java | 26 ++++++ .../compress/colgroup/mapping/MapToInt.java | 11 ++- .../compress/colgroup/mapping/MapToZero.java | 7 +- 8 files changed, 226 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index 0765a158c52..5724509eac3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -22,7 +22,11 @@ import java.io.DataOutput; import java.io.IOException; import java.io.Serializable; +import java.util.ArrayList; import java.util.BitSet; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.logging.Log; @@ -95,7 +99,6 @@ public final void setUnique(int nUnique) { */ public abstract int getIndex(int n); - /** * Shortcut method to support Integer objects, not really efficient but for the purpose of reusing code. * @@ -116,6 +119,18 @@ public void set(int n, Integer v) { */ public abstract void set(int n, int v); + /** + * set a range of values from another map. + * + * The given tm must only contain supported values, and it is not verified. + * + * @param l lower bound + * @param u upper bound (not inclusive) + * @param off offset to take values from tm + * @param tm the other map to copy values from + */ + public abstract void set(int l, int u, int off, AMapToData tm); + /** * Set the index to the value and get the contained value after. * @@ -813,7 +828,11 @@ protected void copyInt(MapToInt d) { * * @param d The array to copy */ - public abstract void copyInt(int[] d); + public void copyInt(int[] d) { + copyInt(d, 0, size()); + } + + public abstract void copyInt(int[] d, int start, int end); public abstract void copyBit(BitSet d); @@ -887,7 +906,8 @@ public int countRuns(AOffset off) { @Override public boolean equals(Object e) { - return e instanceof AMapToData && (this == e || this.equals((AMapToData) e)); + return this == e || // same object or + (e instanceof AMapToData && this.equals((AMapToData) e)); } /** @@ -903,7 +923,7 @@ public void verify() { if(CompressedMatrixBlock.debug) { for(int i = 0; i < size(); i++) { if(getIndex(i) >= nUnique) { - throw new DMLCompressionException("invalid construction of Mapping data containing values above unique"); + throw new DMLCompressionException("Invalid construction of Mapping data containing values above unique"); } } } @@ -934,7 +954,7 @@ public void decompressToRange(double[] c, int rl, int ru, int offR, double[] val decompressToRangeOff(c, rl, ru, offR, values); } - public void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { + protected void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { for(int i = rl, offT = rl + offR; i < ru; i++, offT++) c[offT] += values[getIndex(i)]; } @@ -950,7 +970,7 @@ protected void decompressToRangeNoOffBy8(double[] c, int r, double[] values) { c[r + 7] += values[getIndex(r + 7)]; } - public void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { + protected void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { final int h = (ru - rl) % 8; for(int rc = rl; rc < rl + h; rc++) c[rc] += values[getIndex(rc)]; @@ -958,6 +978,65 @@ public void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) decompressToRangeNoOffBy8(c, rc, values); } + /** + * Split this mapping into x smaller mappings according to round robin. + * + * @param multiplier The number of smaller mappings to construct + * @return The list of smaller mappings + */ + public AMapToData[] splitReshapeDDC(final int multiplier) { + + final int s = size(); + final AMapToData[] ret = new AMapToData[multiplier]; + final int eachSize = s / multiplier; + for(int i = 0; i < multiplier; i++) + ret[i] = MapToFactory.create(eachSize, getUnique()); + + // for(int i = 0; i < s; i += multiplier) + // splitReshapeDDCRow(ret, multiplier, i); + + final int blkz = Math.max(eachSize / 8, 2048) * multiplier; + for(int i = 0; i < s; i += blkz) + splitReshapeDDCBlock(ret, multiplier, i, Math.min(i + blkz, s)); + + return ret; + } + + public AMapToData[] splitReshapeDDCPushDown(final int multiplier, final ExecutorService pool) throws Exception { + + final int s = size(); + final AMapToData[] ret = new AMapToData[multiplier]; + final int eachSize = s / multiplier; + for(int i = 0; i < multiplier; i++) + ret[i] = MapToFactory.create(eachSize, getUnique()); + + final int blkz = Math.max(eachSize / 8, 2048) * multiplier; + List> tasks = new ArrayList<>(); + for(int i = 0; i < s; i += blkz) { + final int start = i; + final int end = Math.min(i + blkz, s); + tasks.add(pool.submit(() -> splitReshapeDDCBlock(ret, multiplier, start, end))); + } + + for(Future t : tasks) + t.get(); + + return ret; + } + + private void splitReshapeDDCBlock(final AMapToData[] ret, final int multiplier, final int start, final int end) { + + for(int i = start; i < end; i += multiplier) + splitReshapeDDCRow(ret, multiplier, i); + } + + private void splitReshapeDDCRow(final AMapToData[] ret, final int multiplier, final int i) { + final int off = i / multiplier; + final int end = i + multiplier; + for(int j = i; j < end; j++) + ret[j % multiplier].set(off, getIndex(j)); + } + @Override public String toString() { final int sz = size(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index f42783fca9f..9fd862bfd90 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -145,6 +145,13 @@ public void set(int n, int v) { _data[wIdx] &= ~(1L << n); } + @Override + public void set(int l, int u, int off, AMapToData tm){ + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + @Override public int setAndGet(int n, int v) { set(n, v); @@ -267,8 +274,8 @@ public void copy(AMapToData d) { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _size; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) set(i, d[i]); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java index 30a26735744..3243f6e849a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java @@ -95,6 +95,23 @@ public void set(int n, int v) { _data[n] = (byte) v; } + @Override + public void set(int l, int u, int off, AMapToData tm){ + if(tm instanceof MapToByte){ + MapToByte tbm = (MapToByte)tm; + byte[] tbv = tbm._data; + for(int i = l; i < u; i++, off++) { + _data[i] = tbv[off]; + } + } + else{ + + for(int i = l; i < u; i++, off++) { + _data[i] = (byte)tm.getIndex(off); + } + } + } + @Override public int setAndGet(int n, int v) { _data[n] = (byte) v; @@ -136,8 +153,8 @@ public void replace(int v, int r) { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _data.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) _data[i] = (byte) d[i]; } @@ -320,13 +337,13 @@ public void decompressToRange(double[] c, int rl, int ru, int offR, double[] val } @Override - public void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { + protected void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { for(int i = rl, offT = rl + offR; i < ru; i++, offT++) c[offT] += values[getIndex(i)]; } @Override - public void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { + protected void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { // OVERWRITTEN FOR JIT COMPILE! final int h = (ru - rl) % 8; for(int rc = rl; rc < rl + h; rc++) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index 690d54af9b8..ba5f00d8cc0 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -22,8 +22,12 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -92,6 +96,26 @@ public void set(int n, int v) { _data[n] = (char) v; } + public void set(int n, char v) { + _data[n] = v; + } + + @Override + public void set(int l, int u, int off, AMapToData tm) { + if(tm instanceof MapToChar) { + MapToChar tbm = (MapToChar) tm; + char[] tbv = tbm._data; + for(int i = l; i < u; i++, off++) { + _data[i] = tbv[off]; + } + } + else { + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + } + @Override public int setAndGet(int n, int v) { return _data[n] = (char) v; @@ -144,7 +168,7 @@ protected static MapToChar readFields(DataInput in) throws IOException { final int length = in.readInt(); final char[] data = new char[length]; for(int i = 0; i < length; i++) - data[i] = in.readChar(); + data[i] = (char)in.readUnsignedShort(); return new MapToChar(unique, data); } @@ -208,8 +232,8 @@ public int getUpperBoundValue() { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _data.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) _data[i] = (char) d[i]; } @@ -391,4 +415,38 @@ protected final void preAggregateDDC_DDCSingleCol_vecChar(MapToChar tm, double[] v[getIndex(r8)] += td[tm.getIndex(r8)]; } + @Override + public AMapToData[] splitReshapeDDCPushDown(final int multiplier, final ExecutorService pool) throws Exception { + final int s = size(); + final MapToChar[] ret = new MapToChar[multiplier]; + final int eachSize = s / multiplier; + for(int i = 0; i < multiplier; i++) + ret[i] = new MapToChar(getUnique(), eachSize); + + final int blkz = Math.max(eachSize / 8, 2048) * multiplier; + List> tasks = new ArrayList<>(); + for(int i = 0; i < s; i += blkz) { + final int start = i; + final int end = Math.min(i + blkz, s); + tasks.add(pool.submit(() -> splitReshapeDDCBlock(ret, multiplier, start, end))); + } + + for(Future t : tasks) + t.get(); + + return ret; + } + + private void splitReshapeDDCBlock(final MapToChar[] ret, final int multiplier, final int start, final int end) { + for(int i = start; i < end; i += multiplier) + splitReshapeDDCRow(ret, multiplier, i); + } + + private void splitReshapeDDCRow(final MapToChar[] ret, final int multiplier, final int i) { + final int off = i / multiplier; + final int end = i + multiplier; + for(int j = i; j < end; j++) + ret[j % multiplier]._data[off] = _data[j]; + } + } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java index 28a68855575..4eca0754667 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java @@ -101,6 +101,13 @@ public void set(int n, int v) { _data_b[n] = (byte) (m >> 16); } + @Override + public void set(int l, int u, int off, AMapToData tm){ + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + @Override public int setAndGet(int n, int v) { int m = v & 0xffffff; @@ -167,8 +174,8 @@ public int getUpperBoundValue() { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < d.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) set(i, d[i]); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java index 970220d9cc2..635f0c4d752 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java @@ -21,9 +21,15 @@ import java.io.DataInput; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.utils.IntArrayList; +import org.apache.sysds.runtime.util.CommonThreadPool; /** Interface for the factory design pattern for construction all AMapToData. */ public interface MapToFactory { @@ -63,6 +69,26 @@ public static AMapToData create(int size, int[] values, int nUnique) { return _data; } + public static AMapToData create(int unique, IntArrayList values) { + AMapToData _data = create(values.size(), unique); + _data.copyInt(values.extractValues()); + return _data; + } + + public static AMapToData create(int size, int[] values, int nUnique, int k) { + AMapToData _data = create(size, nUnique); + ExecutorService pool = CommonThreadPool.get(k); + int blk = Math.max((values.length / k), 1024); + blk -= blk % 64; // ensure long size + List> tasks = new ArrayList<>(); + for(int i = 0; i < values.length; i += blk){ + int start = i; + int end = Math.min(i + blk, values.length); + tasks.add(pool.submit(() -> _data.copyInt(values, start, end))); + } + return _data; + } + /** * Create and allocate a map with the given size and support for upto the num tuples argument of values * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java index 1b1e096757f..0d9a5ba2697 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java @@ -91,6 +91,13 @@ public void set(int n, int v) { _data[n] = v; } + @Override + public void set(int l, int u, int off, AMapToData tm){ + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + @Override public int setAndGet(int n, int v) { return _data[n] = v; @@ -182,8 +189,8 @@ public int getUpperBoundValue() { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _data.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) _data[i] = d[i]; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java index e3797dce3fd..b76228ca0bb 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java @@ -74,6 +74,11 @@ public void set(int n, int v) { // do nothing } + @Override + public void set(int l, int u, int off, AMapToData tm){ + // do nothing + } + @Override public int setAndGet(int n, int v) { return 0; @@ -127,7 +132,7 @@ public void preAggregateDDC_DDCMultiCol(AMapToData tm, IDictionary td, double[] } @Override - public void copyInt(int[] d) { + public void copyInt(int[] d, int start, int end) { // do nothing } From 421e09b2d976655caba1d4b6b992390ac98adcc7 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sat, 28 Dec 2024 22:42:27 +0100 Subject: [PATCH 02/11] more mapping tests --- .../compress/mapping/CustomMappingTest.java | 97 +++++++++++++++++++ .../compress/mapping/MappingTests.java | 54 +++++++++++ .../mapping/PreAggregateDDC_DDCTest.java | 2 + .../mapping/PreAggregateSDCZ_SDCZTest.java | 4 + 4 files changed, 157 insertions(+) diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java index 8ab13cf9f34..ff8c6802951 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java @@ -19,10 +19,24 @@ package org.apache.sysds.test.component.compress.mapping; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.DMLCompressionException; +import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.junit.Test; public class CustomMappingTest { @@ -49,4 +63,87 @@ public void createBinary() { fail(e.getMessage()); } } + + @Test + public void verifySpy() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(data, 2); + AMapToData spy = spy(d); + when(spy.getIndex(2)).thenReturn(32); + assertThrows(DMLCompressionException.class, () -> spy.verify()); + } + + @Test + public void equals() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(data, 2); + AMapToData d2 = MapToFactory.create(data, 2); + assertTrue(d.equals(d)); + assertTrue(d.equals(d2)); + assertFalse(d.equals(MapToFactory.create(new int[]{1,2,3}, 4))); + assertFalse(d.equals(Integer.valueOf(23))); + } + + @Test + public void countRuns() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}, 3); + AOffset o = OffsetFactory.createOffset(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + assertEquals(d.countRuns(o), 2); + } + + @Test + public void countRuns2() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}, 3); + AOffset o = OffsetFactory.createOffset(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11}); + assertEquals(d.countRuns(o), 3); + } + + @Test + public void getMax() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}, 3); + assertEquals(d.getMax(), 2); + d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 5, 2}, 10); + assertEquals(d.getMax(), 5); + d = MapToFactory.create(new int[] {1, 1, 1, 9, 1, 2, 2, 2, 2, 2}, 10); + assertEquals(d.getMax(), 9); + } + + @Test + public void copyInt(){ + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {10,9,8,7,6,5,4,3,2,1}, 11); + AMapToData d2 = MapToFactory.create(new int[] {1,2,3,4,5,6,7,8,9,10}, Integer.MAX_VALUE -2); + d.copy(d2); + for(int i = 0; i < 10; i ++){ + assertEquals(d.getIndex(i), d2.getIndex(i)); + } + } + + @Test + public void setInteger(){ + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {10,9,8,7,6,5,4,3,2,1}, 11); + + for(int i = 0; i < 10; i ++){ + assertEquals(d.getIndex(i), 10- i); + } + d.set(4, Integer.valueOf(13)); + assertEquals(d.getIndex(4), 13); + } + + @Test(expected = NotImplementedException.class) + public void preAggDenseNonContiguous(){ + AMapToData d = MapToFactory.create(new int[] {10,9,8,7,6,5,4,3,2,1}, 11); + MatrixBlock mb = new MatrixBlock(); + MatrixBlock spy = spy(mb); + DenseBlock db = mock(DenseBlock.class); + when(db.isContiguous()).thenReturn(false); + when(spy.getDenseBlock()).thenReturn(db); + + d.preAggregateDense(spy, null, 10, 13,0, 10); + } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index dc64e64f41d..4b18d828e8e 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -31,6 +31,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Random; +import java.util.concurrent.ExecutorService; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.logging.Log; @@ -41,6 +42,7 @@ import org.apache.sysds.runtime.compress.colgroup.mapping.MapToCharPByte; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.util.CommonThreadPool; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -350,6 +352,58 @@ public void testAppendNotSame() { LOG.error("Did not throw exception with: " + m); } + @Test + public void splitReshapeParallel() throws Exception { + if(m.size() % 2 == 0){ + + ExecutorService pool = CommonThreadPool.get(); + AMapToData[] ret = m.splitReshapeDDCPushDown(2, pool); + + for(int i = 0; i < m.size(); i++){ + assertEquals(m.getIndex(i), ret[i % 2].getIndex(i/2)); + } + } + } + + + @Test + public void splitReshape2() throws Exception { + if(m.size() % 2 == 0){ + + AMapToData[] ret = m.splitReshapeDDC(2); + + for(int i = 0; i < m.size(); i++){ + assertEquals(m.getIndex(i), ret[i % 2].getIndex(i/2)); + } + } + } + + @Test + public void splitReshape4() throws Exception { + if(m.size() % 4 == 0){ + + AMapToData[] ret = m.splitReshapeDDC(4); + + for(int i = 0; i < m.size(); i++){ + assertEquals(m.getIndex(i), ret[i % 4].getIndex(i/4)); + } + } + } + + @Test + + public void getCounts(){ + int[] counts = m.getCounts(); + int countZeros = 0; + for(int i= 0; i < m.size(); i++){ + if(m.getIndex(i) == 0) + countZeros++; + } + assertEquals(counts[0], countZeros); + } + + + private static class Holder implements IMapToDataGroup { AMapToData d; diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java index 4837275a32e..0a7f919b4fe 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java @@ -76,6 +76,8 @@ public static Collection data() { create(tests, 10000, 32, 2, 1, r.nextInt(sm)); create(tests, 10000, 2, 2, 1, r.nextInt(sm)); create(tests, 10000, 2, 2, 10, r.nextInt(sm)); + create(tests, 10005, 2, 2, 1, r.nextInt(sm)); + create(tests, 10005, 2, 2, 10, r.nextInt(sm)); createSkewed(tests, 10000, 2, 2, 10, r.nextInt(sm), 0.1); createSkewed(tests, 10000, 2, 2, 10, r.nextInt(sm), 0.01); diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java index f2d85c056d1..2ba02c70d16 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java @@ -75,6 +75,10 @@ public static Collection data() { create(tests, 10000, 150, 13, 1, 1000, 100, r.nextInt(sm)); create(tests, 10000, 150, 149, 1, 1000, 100, r.nextInt(sm)); + create(tests, 10000, 32, 200, 1, 100, 1000, r.nextInt(sm)); + create(tests, 10000, 150, 13, 1, 100, 1000, r.nextInt(sm)); + create(tests, 10000, 150, 149, 1, 100, 1000, r.nextInt(sm)); + return tests; } From e735593e724a2bea41f81755b1fd8aae7fe5e0e8 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sat, 28 Dec 2024 22:42:57 +0100 Subject: [PATCH 03/11] more --- .../sysds/test/component/compress/mapping/MappingTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index 4b18d828e8e..3dbb5a27dd7 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -391,7 +391,6 @@ public void splitReshape4() throws Exception { } @Test - public void getCounts(){ int[] counts = m.getCounts(); int countZeros = 0; From d282ccc1a9e9ae0db4979e50b0b0f4f0cc6e41cd Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 01:21:23 +0100 Subject: [PATCH 04/11] mapping improved tests --- .../compress/colgroup/mapping/AMapToData.java | 4 +- .../compress/colgroup/mapping/MapToBit.java | 2 +- .../compress/colgroup/mapping/MapToByte.java | 2 +- .../compress/colgroup/mapping/MapToChar.java | 4 +- .../colgroup/mapping/MapToCharPByte.java | 5 +- .../colgroup/mapping/MapToFactory.java | 57 ++++---- .../compress/colgroup/mapping/MapToInt.java | 6 +- .../compress/colgroup/mapping/MapToUByte.java | 6 +- .../compress/colgroup/mapping/MapToZero.java | 2 +- .../compress/colgroup/scheme/DDCSchemeMC.java | 4 +- .../compress/colgroup/scheme/DDCSchemeSC.java | 6 +- .../compress/mapping/CustomMappingTest.java | 137 ++++++++++++++++-- .../compress/mapping/MappingTests.java | 107 +++++++++----- .../compress/mapping/MappingTestsResize.java | 93 ------------ 14 files changed, 247 insertions(+), 188 deletions(-) delete mode 100644 src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index 5724509eac3..ff521dec5d6 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -829,7 +829,7 @@ protected void copyInt(MapToInt d) { * @param d The array to copy */ public void copyInt(int[] d) { - copyInt(d, 0, size()); + copyInt(d, 0, Math.min(d.length, size())); } public abstract void copyInt(int[] d, int start, int end); @@ -846,7 +846,7 @@ public int getMax() { } /** - * Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values + * Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values therefore max is 1 * * @return The maximum number of distinct values to encode */ diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index 9fd862bfd90..8c2753996cc 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -442,7 +442,7 @@ private static int longSize(int size) { } public int getMaxPossible() { - return 2; + return 1; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java index 3243f6e849a..1763d9323d7 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java @@ -284,7 +284,7 @@ public AMapToData appendN(IMapToDataGroup[] d) { @Override public int getMaxPossible() { - return 256; + return 255; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index ba5f00d8cc0..fa25ff43ee2 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -46,7 +46,7 @@ public class MapToChar extends AMapToData { private final char[] _data; protected MapToChar(int size) { - this(Character.MAX_VALUE, size); + this(Character.MAX_VALUE+1, size); } public MapToChar(int unique, int size) { @@ -328,7 +328,7 @@ public AMapToData appendN(IMapToDataGroup[] d) { @Override public int getMaxPossible() { - return Character.MAX_VALUE; + return Character.MAX_VALUE ; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java index 4eca0754667..22a3ff2b952 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java @@ -38,8 +38,7 @@ public class MapToCharPByte extends AMapToData { private static final long serialVersionUID = 6315708056775476541L; - // 8323073 - public static final int max = 0xFFFF * 127; + public static final int max = (0xFFFF + 1) * 128 -1; private final char[] _data_c; private final byte[] _data_b; // next byte after the char @@ -280,7 +279,7 @@ public AMapToData appendN(IMapToDataGroup[] d) { @Override public int getMaxPossible() { - return Character.MAX_VALUE * 256; + return (Character.MAX_VALUE+1) * 256 -1; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java index 635f0c4d752..5154a8e2333 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java @@ -75,42 +75,45 @@ public static AMapToData create(int unique, IntArrayList values) { return _data; } - public static AMapToData create(int size, int[] values, int nUnique, int k) { + public static AMapToData create(int size, int[] values, int nUnique, int k) throws Exception { AMapToData _data = create(size, nUnique); - ExecutorService pool = CommonThreadPool.get(k); + final ExecutorService pool = CommonThreadPool.get(k); int blk = Math.max((values.length / k), 1024); blk -= blk % 64; // ensure long size List> tasks = new ArrayList<>(); - for(int i = 0; i < values.length; i += blk){ + for(int i = 0; i < values.length; i += blk) { int start = i; int end = Math.min(i + blk, values.length); tasks.add(pool.submit(() -> _data.copyInt(values, start, end))); } + + for(Future t : tasks) + t.get(); return _data; } /** - * Create and allocate a map with the given size and support for upto the num tuples argument of values + * Create and allocate a map with the given size and support for up to the num tuples argument of values * - * @param size The number of cells to allocate - * @param numTuples The maximum value to be able to represent inside the map. + * @param size The number of cells to allocate + * @param unique The number of unique values to support (can encode unique -1) * @return A new map */ - public static AMapToData create(final int size, final int numTuples) { - if(numTuples <= 1) + public static AMapToData create(final int size, final int unique) { + if(unique <= 1) return new MapToZero(size); - else if(numTuples == 2 && size > 32) - return new MapToBit(numTuples, size); - else if(numTuples <= 127) - return new MapToUByte(numTuples, size); - else if(numTuples <= 256) - return new MapToByte(numTuples, size); - else if(numTuples <= Character.MAX_VALUE + 1) - return new MapToChar(numTuples, size); - else if(numTuples <= MapToCharPByte.max) - return new MapToCharPByte(numTuples, size); + else if(unique == 2 && size > 32) + return new MapToBit(unique, size); + else if(unique <= 128) + return new MapToUByte(unique, size); + else if(unique <= 256) + return new MapToByte(unique, size); + else if(unique <= Character.MAX_VALUE + 1) + return new MapToChar(unique, size); + else if(unique <= MapToCharPByte.max + 1) + return new MapToCharPByte(unique, size); else - return new MapToInt(numTuples, size); + return new MapToInt(unique, size); } /** @@ -181,20 +184,20 @@ public static AMapToData resizeForce(AMapToData d, MAP_TYPE t) { /** * Estimate the size in memory of a MapToFactory. * - * @param size The size of the mapping - * @param numTuples The number of unique values to be supported by the mapping + * @param size The size of the mapping + * @param unique The number of unique values to support (can encode unique -1) * @return The size in number of bytes. */ - public static long estimateInMemorySize(int size, int numTuples) { - if(numTuples <= 1) + public static long estimateInMemorySize(int size, int unique) { + if(unique <= 1) return MapToZero.getInMemorySize(size); - else if(numTuples == 2 && size > 32) + else if(unique == 2 && size > 32) return MapToBit.getInMemorySize(size); - else if(numTuples <= 256) + else if(unique <= 256) return MapToByte.getInMemorySize(size); - else if(numTuples <= Character.MAX_VALUE + 1) + else if(unique <= Character.MAX_VALUE + 1) return MapToChar.getInMemorySize(size); - else if(numTuples <= MapToCharPByte.max) + else if(unique <= MapToCharPByte.max) return MapToCharPByte.getInMemorySize(size); else return MapToInt.getInMemorySize(size); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java index 0d9a5ba2697..7241fd7c669 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java @@ -37,7 +37,7 @@ public class MapToInt extends AMapToData { private final int[] _data; protected MapToInt(int size) { - this(Character.MAX_VALUE + 1, size); + this(Integer.MAX_VALUE, size); } public MapToInt(int unique, int size) { @@ -226,11 +226,11 @@ public AMapToData resize(int unique) { return new MapToZero(size); else if(unique == 2 && size > 32) ret = new MapToBit(unique, size); - else if(unique <= 127) + else if(unique < 128) ret = new MapToUByte(unique, size); else if(unique < 256) ret = new MapToByte(unique, size); - else if(unique < Character.MAX_VALUE - 1) + else if(unique < Character.MAX_VALUE ) ret = new MapToChar(unique, size); else if(unique < MapToCharPByte.max) ret = new MapToCharPByte(unique, size); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java index e8c9b0926a3..b76ec7e0f58 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java @@ -31,11 +31,11 @@ public class MapToUByte extends MapToByte { private static final long serialVersionUID = -2498505439667351828L; protected MapToUByte(int size) { - this(127, size); + this(128, size); } public MapToUByte(int unique, int size) { - super(Math.min(unique, 127), new byte[size]); + super(Math.min(unique, 128), new byte[size]); } protected MapToUByte(int unique, byte[] data) { @@ -126,7 +126,7 @@ public int[] getCounts(int[] ret) { @Override public int getMaxPossible() { - return 128; + return 127; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java index b76228ca0bb..57e267470ad 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java @@ -185,7 +185,7 @@ public AMapToData appendN(IMapToDataGroup[] d) { @Override public int getMaxPossible() { - return 1; + return 0; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java index 9032dfd0f5d..c8a1b4e3c48 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java @@ -189,7 +189,7 @@ private Pair tryUpdateAndEncode(MatrixBlock data, ReaderC while((cellVals = reader.nextRow()) != null) { final int row = reader.getCurrentRowIndex(); final int id = map.increment(cellVals); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(row, id); } @@ -204,7 +204,7 @@ private Pair tryUpdateAndEncode(MatrixBlock data, ReaderC d.set(r++, emptyIdx.id); } final int id = map.increment(cellVals); - if(id >= max) + if(id > max) throw new DMLCompressionException( "Failed update and encode with " + max + " possible values" + map + " " + map.size()); d.set(row, id); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java index 2a5981dea6f..b679c745e00 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java @@ -189,7 +189,7 @@ private void encodeAndUpdateSparse(MatrixBlock data, AMapToData d, int col, int for(int i = 0; i < nRow; i++) { int id = map.increment(sb.get(i, col)); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(i, id); } @@ -203,7 +203,7 @@ private void encodeAndUpdateDense(final MatrixBlock data, final AMapToData d, fi final int end = nRow * nCol; // guaranteed lower than intend. for(int i = 0, off = col; off < end; i++, off += nCol) { int id = map.increment(vals[off]); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(i, id); } @@ -216,7 +216,7 @@ private void encodeAndUpdateGeneric(MatrixBlock data, AMapToData d, int col, int final double[] c = db.values(i); final int off = db.pos(i) + col; int id = map.increment(c[off]); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(i, id); } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java index ff8c6802951..058b769620c 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java @@ -28,19 +28,35 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; +import java.util.Random; + import org.apache.commons.lang3.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToBit; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToByte; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToChar; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToCharPByte; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToInt; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToUByte; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToZero; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; +import org.apache.sysds.runtime.compress.utils.IntArrayList; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.junit.Test; public class CustomMappingTest { + protected static final Log LOG = LogFactory.getLog(CustomMappingTest.class.getName()); + + int[] data = new int[] {0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, @@ -80,7 +96,7 @@ public void equals() { AMapToData d2 = MapToFactory.create(data, 2); assertTrue(d.equals(d)); assertTrue(d.equals(d2)); - assertFalse(d.equals(MapToFactory.create(new int[]{1,2,3}, 4))); + assertFalse(d.equals(MapToFactory.create(new int[] {1, 2, 3}, 4))); assertFalse(d.equals(Integer.valueOf(23))); } @@ -112,38 +128,133 @@ public void getMax() { } @Test - public void copyInt(){ + public void copyInt() { CompressedMatrixBlock.debug = true; - AMapToData d = MapToFactory.create(new int[] {10,9,8,7,6,5,4,3,2,1}, 11); - AMapToData d2 = MapToFactory.create(new int[] {1,2,3,4,5,6,7,8,9,10}, Integer.MAX_VALUE -2); + AMapToData d = MapToFactory.create(new int[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, 11); + AMapToData d2 = MapToFactory.create(new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, Integer.MAX_VALUE - 2); d.copy(d2); - for(int i = 0; i < 10; i ++){ + for(int i = 0; i < 10; i++) { assertEquals(d.getIndex(i), d2.getIndex(i)); } } @Test - public void setInteger(){ + public void setInteger() { CompressedMatrixBlock.debug = true; - AMapToData d = MapToFactory.create(new int[] {10,9,8,7,6,5,4,3,2,1}, 11); - - for(int i = 0; i < 10; i ++){ - assertEquals(d.getIndex(i), 10- i); + AMapToData d = MapToFactory.create(new int[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, 11); + + for(int i = 0; i < 10; i++) { + assertEquals(d.getIndex(i), 10 - i); } d.set(4, Integer.valueOf(13)); assertEquals(d.getIndex(4), 13); } @Test(expected = NotImplementedException.class) - public void preAggDenseNonContiguous(){ - AMapToData d = MapToFactory.create(new int[] {10,9,8,7,6,5,4,3,2,1}, 11); + public void preAggDenseNonContiguous() { + AMapToData d = MapToFactory.create(new int[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, 11); MatrixBlock mb = new MatrixBlock(); MatrixBlock spy = spy(mb); DenseBlock db = mock(DenseBlock.class); when(db.isContiguous()).thenReturn(false); when(spy.getDenseBlock()).thenReturn(db); - d.preAggregateDense(spy, null, 10, 13,0, 10); + d.preAggregateDense(spy, null, 10, 13, 0, 10); + } + + @Test + public void compareParallelCreate() throws Exception { + int[] input = new int[100]; + Random r = new Random(32); + for(int i = 0; i < 100; i++) { + input[i] = r.nextInt(100); + } + + AMapToData a = MapToFactory.create(101, input, 100, 10); + AMapToData b = MapToFactory.create(101, input, 100); + assertEquals(a, b); } + @Test + public void allocateMapToPByte() { + assertEquals(MapToCharPByte.class, MapToFactory.create(10, MapToCharPByte.max).getClass()); + } + + @Test + public void createSpecificType() { + assertEquals(MapToCharPByte.class, MapToFactory.create(10, MAP_TYPE.CHAR_BYTE).getClass()); + assertEquals(MapToZero.class, MapToFactory.create(10, MAP_TYPE.ZERO).getClass()); + assertEquals(MapToBit.class, MapToFactory.create(10, MAP_TYPE.BIT).getClass()); + assertEquals(MapToByte.class, MapToFactory.create(10, MAP_TYPE.BYTE).getClass()); + assertEquals(MapToUByte.class, MapToFactory.create(10, MAP_TYPE.UBYTE).getClass()); + assertEquals(MapToChar.class, MapToFactory.create(10, MAP_TYPE.CHAR).getClass()); + assertEquals(MapToInt.class, MapToFactory.create(10, MAP_TYPE.INT).getClass()); + } + + @Test + public void estimateInMemorySize() { + for(int i = 0; i < 10; i++) { + + assertEquals(MapToFactory.estimateInMemorySize(i, i), MapToFactory.create(i, i).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256), MapToFactory.create(i, 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256 * 256), + MapToFactory.create(i, 256 * 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256 * 256 * 256), + MapToFactory.create(i, 256 * 256 * 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256 * 256 * 256 * 256), + MapToFactory.create(i, 256 * 256 * 256 * 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, MapToCharPByte.max), + MapToFactory.create(i, MapToCharPByte.max).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, Integer.MAX_VALUE), + MapToFactory.create(i, Integer.MAX_VALUE).getInMemorySize()); + } + } + + @Test + public void createWithIntArrayList() { + AMapToData a = MapToFactory.create(10, new IntArrayList(new int[] {1, 2, 3, 4})); + for(int i = 0; i < 4; i++) { + assertEquals(i + 1, a.getIndex(i)); + } + } + + @Test + public void resize() { + int s = 10; + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData a = MapToFactory.create(s, m); + a.getMaxPossible(); + for(int i = 1; i < Integer.MAX_VALUE/2 && i < a.getMaxPossible(); i = i * 2) { + for(int j = 0; j < s; j ++){ + a.set(j, (int)Math.max(0L, (long)i-j-1)); + } + AMapToData b = a.resize(i); + String mm = a.toString() + " vs " + b.toString(); + for(int j = 0; j < s; j++){ + assertEquals(mm,a.getIndex(j), b.getIndex(j)); + } + } + + } + } + + @Test + public void resize2() { + int s = 42; + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData a = MapToFactory.create(s, m); + a.getMaxPossible(); + for(int i = 1; i < Integer.MAX_VALUE/2 && i < a.getMaxPossible(); i = i * 2) { + for(int j = 0; j < s; j ++){ + a.set(j, (int)Math.max(0L, (long)i-j-1)); + } + AMapToData b = a.resize(i); + String mm = a.toString() + " vs " + b.toString(); + for(int j = 0; j < s; j++){ + assertEquals(mm,a.getIndex(j), b.getIndex(j)); + } + } + + } + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index 3dbb5a27dd7..b32c2acac2b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -20,6 +20,8 @@ package org.apache.sysds.test.component.compress.mapping; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; @@ -42,6 +44,7 @@ import org.apache.sysds.runtime.compress.colgroup.mapping.MapToCharPByte; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToZero; import org.apache.sysds.runtime.util.CommonThreadPool; import org.junit.Test; import org.junit.runner.RunWith; @@ -95,24 +98,28 @@ public MappingTests(int seed, MAP_TYPE type, int size, boolean fill) { this.seed = seed; this.type = type; this.size = size; - this.max = Math.min(Math.min(MappingTestUtil.getUpperBoundValue(type), fictiveMax) + 1, size); - expected = new int[size]; - m = genMap(MapToFactory.create(size, max), expected, max, fill, seed); + this.max = MappingTestUtil.getUpperBoundValue(type); + this.expected = new int[size]; + m = genMap(MapToFactory.create(size, (int) (Math.min(Integer.MAX_VALUE, (long) max + 1))), expected, max, fill, + seed); } public static AMapToData genMap(AMapToData m, int[] expected, int max, boolean fill, int seed) { - if(max <= 1) + if(max <= 0) return m; Random vals = new Random(seed); int size = m.size(); + + int randUpperBound = (int) (Math.min(Integer.MAX_VALUE, (long) max + 1)); + if(fill) { - int v = vals.nextInt(max); + int v = vals.nextInt(randUpperBound); m.fill(v); Arrays.fill(expected, v); } for(int i = 0; i < size; i++) { - int v = vals.nextInt(max); + int v = vals.nextInt(randUpperBound); if(fill) { if(v > max / 2) continue; @@ -128,7 +135,7 @@ public static AMapToData genMap(AMapToData m, int[] expected, int max, boolean f } // to make sure that the bit set is actually filled. - for(int i = 0; i < max; i++) { + for(int i = 0; i <= max && i < size; i++) { m.set(i, i); expected[i] = i; @@ -156,7 +163,6 @@ public void testSerialization() { DataInputStream fis = new DataInputStream(bis); AMapToData n = MapToFactory.readIn(fis); - compare(m, n); } catch(IOException e) { @@ -168,6 +174,26 @@ public void testSerialization() { } } + @Test + public void equalsTest() { + AMapToData tmp = MapToFactory.create(m.size(), m.getUnique()); + if(m instanceof MapToZero) + assertTrue(m.equals(tmp)); + else + assertFalse(m.equals(tmp)); + tmp.copy(m); + assertTrue(m.equals(tmp)); + } + + @Test + public void countRuns() { + if(m.getUnique() > m.size()) + return; + int runs = m.countRuns(); + + assertTrue(runs <= m.size()); + } + @Test public void testOnDiskSizeInBytes() { try { @@ -215,7 +241,7 @@ public void resize() { @Test public void resizeToSameSize() { // if we resize to same size return the same object! - AMapToData m_same = m.resize( m.getUnique()); + AMapToData m_same = m.resize(m.getUnique()); assertEquals("Resize did not return the correct same objects", m_same, m); } @@ -228,10 +254,12 @@ protected static void compare(AMapToData a, AMapToData b) { @Test public void replaceMax() { - m.replace(max - 1, 0); + if(m instanceof MapToZero) + return; + m.replace(max, 0); for(int i = 0; i < size; i++) { - expected[i] = expected[i] == max - 1 ? 0 : expected[i]; + expected[i] = expected[i] == max ? 0 : expected[i]; if(expected[i] != m.getIndex(i)) fail("Expected equals " + Arrays.toString(expected) + "\nbut got: " + m); } @@ -242,6 +270,9 @@ public void getCountsNoDefault() { try { int nVal = m.getUnique(); + if(nVal > 1000) + return; + int[] counts = m.getCounts(new int[nVal]); int sum = 0; for(int v : counts) @@ -258,6 +289,8 @@ public void getCountsNoDefault() { @Test public void replaceMin() { + if(m instanceof MapToZero) + return; int max = m.getUpperBoundValue(); m.replace(0, max); @@ -271,14 +304,17 @@ public void replaceMin() { @Test public void getUnique() { int u = m.getUnique(); - if(max != u) + if(m instanceof MapToZero) + return; + + if((int) (Math.min(Integer.MAX_VALUE, (long) max + 1)) != u) fail("incorrect number of unique " + m + "expectedInstances" + max + " got" + u); } @Test public void testInMemorySize() { long inMemorySize = m.getInMemorySize(); - long estimatedSize = MapToFactory.estimateInMemorySize(size, max); + long estimatedSize = MapToFactory.estimateInMemorySize(size, (int) (Math.min(Integer.MAX_VALUE, (long) max + 1))); if(estimatedSize != inMemorySize) fail(" estimated size is not actual size: \nest: " + estimatedSize + " act: " + inMemorySize + "\n" @@ -288,6 +324,8 @@ public void testInMemorySize() { @Test public void testAppend() { int nVal = m.getUnique(); + if(nVal > 10000) + return; int[] counts = m.getCounts(new int[nVal]); AMapToData m2 = m.append(m); @@ -302,6 +340,8 @@ public void testAppend() { @Test public void testAppendN() { int nVal = m.getUnique(); + if(nVal > 10000) + return; int[] counts = m.getCounts(new int[nVal]); try { @@ -352,57 +392,56 @@ public void testAppendNotSame() { LOG.error("Did not throw exception with: " + m); } - @Test + @Test public void splitReshapeParallel() throws Exception { - if(m.size() % 2 == 0){ + if(m.size() % 2 == 0) { ExecutorService pool = CommonThreadPool.get(); AMapToData[] ret = m.splitReshapeDDCPushDown(2, pool); - - for(int i = 0; i < m.size(); i++){ - assertEquals(m.getIndex(i), ret[i % 2].getIndex(i/2)); + + for(int i = 0; i < m.size(); i++) { + assertEquals(m.getIndex(i), ret[i % 2].getIndex(i / 2)); } } } - - @Test + @Test public void splitReshape2() throws Exception { - if(m.size() % 2 == 0){ + if(m.size() % 2 == 0) { AMapToData[] ret = m.splitReshapeDDC(2); - - for(int i = 0; i < m.size(); i++){ - assertEquals(m.getIndex(i), ret[i % 2].getIndex(i/2)); + + for(int i = 0; i < m.size(); i++) { + assertEquals(m.getIndex(i), ret[i % 2].getIndex(i / 2)); } } } - @Test + @Test public void splitReshape4() throws Exception { - if(m.size() % 4 == 0){ + if(m.size() % 4 == 0) { AMapToData[] ret = m.splitReshapeDDC(4); - - for(int i = 0; i < m.size(); i++){ - assertEquals(m.getIndex(i), ret[i % 4].getIndex(i/4)); + + for(int i = 0; i < m.size(); i++) { + assertEquals(m.getIndex(i), ret[i % 4].getIndex(i / 4)); } } } - @Test - public void getCounts(){ + @Test + public void getCounts() { + if(m.getUnique() > 10000) + return; int[] counts = m.getCounts(); int countZeros = 0; - for(int i= 0; i < m.size(); i++){ + for(int i = 0; i < m.size(); i++) { if(m.getIndex(i) == 0) countZeros++; } assertEquals(counts[0], countZeros); } - - private static class Holder implements IMapToDataGroup { AMapToData d; diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java deleted file mode 100644 index 3bcbb7ac041..00000000000 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysds.test.component.compress.mapping; - -import static org.junit.Assert.fail; - -import java.util.ArrayList; -import java.util.Collection; - -import org.apache.sysds.runtime.compress.CompressedMatrixBlock; -import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; -import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; -import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(value = Parameterized.class) -public class MappingTestsResize { - - public final int seed; - public final MAP_TYPE type; - public final int size; - - private AMapToData m; - private int[] expected; - - @Parameters - public static Collection data() { - ArrayList tests = new ArrayList<>(); - for(MAP_TYPE t : MAP_TYPE.values()) { - tests.add(new Object[] {1, t, 13, false}); - tests.add(new Object[] {1, t, 632, false}); - } - return tests; - } - - public MappingTestsResize(int seed, MAP_TYPE type, int size, boolean fill) { - CompressedMatrixBlock.debug = true; - this.seed = seed; - this.type = type; - this.size = size; - try{ - - final int max = Math.min(MappingTestUtil.getUpperBoundValue(type),size); - final int maxSmaller = Math.min(getMaxSmaller(type), size); - expected = new int[size]; - m = MappingTests.genMap(MapToFactory.create(size, max), expected, maxSmaller, fill, seed); - } - catch(Exception e){ - e.printStackTrace(); - fail("Failed creating mapping resize test"); - } - } - - @Test - public void resize() { - MappingTests.compare(m.resize(getMaxSmaller(type)), m); - } - - private int getMaxSmaller(MAP_TYPE type) { - switch(type) { - case BIT: - case UBYTE: - return 1; - case BYTE: - return 127; - case CHAR: - return (int) Math.pow(2, 8) - 1; - default: - return Character.MAX_VALUE; - } - } - -} From 8f461566ffd545f5179b6385bc8ace9333d870cb Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 17:11:19 +0100 Subject: [PATCH 05/11] more tests --- .../compress/colgroup/mapping/AMapToData.java | 18 +- .../compress/colgroup/mapping/MapToBit.java | 82 +++--- .../compress/colgroup/mapping/MapToByte.java | 4 +- .../compress/colgroup/mapping/MapToChar.java | 8 +- .../colgroup/mapping/MapToCharPByte.java | 6 - .../colgroup/mapping/MapToFactory.java | 40 +++ .../compress/colgroup/mapping/MapToInt.java | 11 +- .../compress/colgroup/mapping/MapToUByte.java | 4 - .../compress/colgroup/mapping/MapToZero.java | 4 +- .../compress/mapping/CustomMappingTest.java | 241 +++++++++++++++++- .../mapping/MappingPreAggregateTests.java | 45 +++- .../compress/mapping/MappingTests.java | 1 + .../mapping/PreAggregateDDC_DDCTest.java | 6 + 13 files changed, 382 insertions(+), 88 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index ff521dec5d6..29eedf45561 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -23,7 +23,6 @@ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; -import java.util.BitSet; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; @@ -803,9 +802,9 @@ public void preAggregateDDC_RLE(int[] ptr, char[] data, IDictionary td, Dictiona */ public void copy(AMapToData d) { if(d.nUnique == 1) - return; - // else if(d instanceof MapToBit) - // copyBit((MapToBit) d); + fill(0); + else if(d instanceof MapToBit) + copyBit((MapToBit) d); else if(d instanceof MapToInt) copyInt((MapToInt) d); else { @@ -834,7 +833,13 @@ public void copyInt(int[] d) { public abstract void copyInt(int[] d, int start, int end); - public abstract void copyBit(BitSet d); + + public void copyBit(MapToBit d) { + fill(0); + for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) { + set(i, 1); + } + } public int getMax() { int m = -1; @@ -846,7 +851,8 @@ public int getMax() { } /** - * Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values therefore max is 1 + * Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values therefore + * max is 1 * * @return The maximum number of distinct values to encode */ diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index 8c2753996cc..8e3e560be3b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -77,15 +77,7 @@ public MapToBit(int unique, int size) { } private MapToBit(int unique, BitSet d, int size) { - super(unique); - long[] bsd = d.toLongArray(); - if(bsd.length == longSize(size)) - _data = bsd; - else { - _data = new long[longSize(size)]; - System.arraycopy(bsd, 0, _data, 0, bsd.length); - } - _size = size; + this(unique, d.toLongArray(), size); } private MapToBit(int unique, long[] bsd, int size) { @@ -99,10 +91,6 @@ private MapToBit(int unique, long[] bsd, int size) { _size = size; } - protected long[] getData() { - return _data; - } - @Override public MAP_TYPE getType() { return MapToFactory.MAP_TYPE.BIT; @@ -110,17 +98,19 @@ public MAP_TYPE getType() { @Override public int getIndex(int n) { - int wIdx = n >> 6; // same as divide by 64 bit faster + int wIdx = n >> 6; // same as divide by 64 but faster return (_data[wIdx] & (1L << n)) != 0L ? 1 : 0; } @Override public void fill(int v) { - long re = (_data.length * 64) - _size; - if(re == 0 || v == 0) - Arrays.fill(_data, v == 0 ? 0L : -1L); + final long re = (_data.length * 64) - _size; + final boolean fillZero = v == 0; + final long fillValue = fillZero ? 0L : -1L; + if(re == 0 || fillZero) + Arrays.fill(_data, fillValue); else { - Arrays.fill(_data, 0, _data.length - 1, v == 0 ? 0L : -1L); + Arrays.fill(_data, 0, _data.length - 1, fillValue); _data[_data.length - 1] = -1L >>> re; } } @@ -146,7 +136,7 @@ public void set(int n, int v) { } @Override - public void set(int l, int u, int off, AMapToData tm){ + public void set(int l, int u, int off, AMapToData tm) { for(int i = l; i < u; i++, off++) { set(i, tm.getIndex(off)); } @@ -155,7 +145,7 @@ public void set(int l, int u, int off, AMapToData tm){ @Override public int setAndGet(int n, int v) { set(n, v); - return 1; + return v == 1 ? 1 : 0; } @Override @@ -260,19 +250,6 @@ public boolean isEmpty() { return true; } - @Override - public void copy(AMapToData d) { - // if(d instanceof MapToBit) - // copyBit((MapToBit) d); - if(d instanceof MapToInt) - copyInt((MapToInt) d); - else { - final int sz = size(); - for(int i = 0; i < sz; i++) - set(i, d.getIndex(i)); - } - } - @Override public void copyInt(int[] d, int start, int end) { for(int i = start; i < end; i++) @@ -280,11 +257,33 @@ public void copyInt(int[] d, int start, int end) { } @Override - public void copyBit(BitSet d) { - long[] vals = d.toLongArray(); - System.arraycopy(vals, 0, _data, 0, vals.length); - if(vals.length < _data.length) - Arrays.fill(_data, vals.length, _data.length, 0L); + public void copyBit(MapToBit d) { + long[] vals = d._data; + System.arraycopy(vals, 0, _data, 0, Math.min(vals.length, _data.length)); + } + + /** + * Return the index of the next bit set to one. If no more bits are set to one return -1. The method behaves + * similarly to and is inspired from java's BitSet. If a negative value is given as input it fails. + * + * @param fromIndex The index to start from (inclusive) + * @return The next valid index. + */ + public int nextSetBit(int fromIndex) { + if(fromIndex >= _size) + return -1; + int u = fromIndex >> 6; // long trick instead of division by 64. + final int s = _data.length; + // mask out previous set bits in this word. + long word = _data[u] & (0xffffffffffffffffL << fromIndex); + + while(true) { + if(word != 0) + return (u * 64) + Long.numberOfTrailingZeros(word); + if(++u == s) + return -1; + word = _data[u]; + } } private static class JoinBitSets { @@ -386,7 +385,12 @@ public int countRuns() { @Override public AMapToData slice(int l, int u) { long[] s = BitSetArray.sliceVectorized(_data, l, u); - return new MapToBit(getUnique(), s, u - l); + MapToBit m = new MapToBit(getUnique(), s, u - l); + + if(m.isEmpty()) + return new MapToZero(u - l); + else + return m; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java index 1763d9323d7..fb7ae3d981c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java @@ -23,7 +23,6 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -159,7 +158,8 @@ public void copyInt(int[] d, int start, int end) { } @Override - public void copyBit(BitSet d) { + public void copyBit(MapToBit d) { + fill(0); for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) { _data[i] = 1; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index fa25ff43ee2..00966efc969 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.BitSet; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; @@ -237,12 +236,7 @@ public void copyInt(int[] d, int start, int end) { _data[i] = (char) d[i]; } - @Override - public void copyBit(BitSet d) { - for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) { - _data[i] = 1; - } - } + @Override public int[] getCounts(int[] ret) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java index 22a3ff2b952..92a41ab8ade 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java @@ -23,7 +23,6 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -178,11 +177,6 @@ public void copyInt(int[] d, int start, int end) { set(i, d[i]); } - @Override - public void copyBit(BitSet d) { - for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) - _data_c[i] = 1; - } @Override public int[] getCounts(int[] ret) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java index 5154a8e2333..db139a8ce7a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java @@ -143,6 +143,20 @@ public static AMapToData create(final int size, final MAP_TYPE t) { } } + /** + * Create a specific mapping based on the integer values given. This constructor does not guarantee the values in the + * int array is encode-able in the given mapping. + * + * @param values The values to encode into the mapping + * @param t The mapping type to use + * @return The filled mapping with the values + */ + public static AMapToData create(final int[] values, final MAP_TYPE t) { + AMapToData map = create(values.length, t); + map.copyInt(values); + return map; + } + /** * Force the mapping into an other mapping type. This method is unsafe since if there is overflows in the * conversions, they are not handled. Also if the change is into the same type a new map is allocated anyway. @@ -230,4 +244,30 @@ public static AMapToData readIn(DataInput in) throws IOException { return MapToInt.readFields(in); } } + + /** + * Get the maximum value possible to encode in a specific mapping type. + * + * @param t The mapping type to analyze + * @return The maximum value to encode. + */ + public static int getMaxPossible(MAP_TYPE t) { + switch(t) { + case ZERO: + return 0; + case BIT: + return 1; + case UBYTE: + return 127; + case BYTE: + return 255; + case CHAR: + return Character.MAX_VALUE; + case CHAR_BYTE: + return MapToCharPByte.max; + case INT: + default: + return Integer.MAX_VALUE; + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java index 7241fd7c669..0c31d251222 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java @@ -23,7 +23,6 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -92,7 +91,7 @@ public void set(int n, int v) { } @Override - public void set(int l, int u, int off, AMapToData tm){ + public void set(int l, int u, int off, AMapToData tm) { for(int i = l; i < u; i++, off++) { set(i, tm.getIndex(off)); } @@ -194,12 +193,6 @@ public void copyInt(int[] d, int start, int end) { _data[i] = d[i]; } - @Override - public void copyBit(BitSet d) { - for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) - _data[i] = 1; - } - @Override public int[] getCounts(int[] ret) { for(int i = 0; i < _data.length; i++) @@ -230,7 +223,7 @@ else if(unique < 128) ret = new MapToUByte(unique, size); else if(unique < 256) ret = new MapToByte(unique, size); - else if(unique < Character.MAX_VALUE ) + else if(unique < Character.MAX_VALUE) ret = new MapToChar(unique, size); else if(unique < MapToCharPByte.max) ret = new MapToCharPByte(unique, size); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java index b76ec7e0f58..a68a6c49de0 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java @@ -62,10 +62,6 @@ public void fill(int v) { Arrays.fill(_data, (byte) (v % 128)); } - public static long getInMemorySize(int dataLength) { - return MapToByte.getInMemorySize(dataLength); - } - @Override public void write(DataOutput out) throws IOException { out.writeByte(MAP_TYPE.UBYTE.ordinal()); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java index 57e267470ad..4caee98486b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java @@ -22,7 +22,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -137,7 +136,7 @@ public void copyInt(int[] d, int start, int end) { } @Override - public void copyBit(BitSet d) { + public void copyBit(MapToBit d) { // do nothing } @@ -191,7 +190,6 @@ public int getMaxPossible() { @Override public boolean equals(AMapToData e) { return e instanceof MapToZero && // - e.getUnique() == getUnique() && // _size == ((MapToZero) e)._size; } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java index 058b769620c..14224cc9f05 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -28,6 +29,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; +import java.util.Arrays; import java.util.Random; import org.apache.commons.lang3.NotImplementedException; @@ -35,6 +37,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.DMLCompressionException; +import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToBit; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToByte; @@ -56,7 +59,6 @@ public class CustomMappingTest { protected static final Log LOG = LogFactory.getLog(CustomMappingTest.class.getName()); - int[] data = new int[] {0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, @@ -224,14 +226,14 @@ public void resize() { for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { AMapToData a = MapToFactory.create(s, m); a.getMaxPossible(); - for(int i = 1; i < Integer.MAX_VALUE/2 && i < a.getMaxPossible(); i = i * 2) { - for(int j = 0; j < s; j ++){ - a.set(j, (int)Math.max(0L, (long)i-j-1)); + for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getMaxPossible(); i = i * 2) { + for(int j = 0; j < s; j++) { + a.set(j, (int) Math.max(0L, (long) i - j - 1)); } AMapToData b = a.resize(i); String mm = a.toString() + " vs " + b.toString(); - for(int j = 0; j < s; j++){ - assertEquals(mm,a.getIndex(j), b.getIndex(j)); + for(int j = 0; j < s; j++) { + assertEquals(mm, a.getIndex(j), b.getIndex(j)); } } @@ -244,17 +246,234 @@ public void resize2() { for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { AMapToData a = MapToFactory.create(s, m); a.getMaxPossible(); - for(int i = 1; i < Integer.MAX_VALUE/2 && i < a.getMaxPossible(); i = i * 2) { - for(int j = 0; j < s; j ++){ - a.set(j, (int)Math.max(0L, (long)i-j-1)); + for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getMaxPossible(); i = i * 2) { + for(int j = 0; j < s; j++) { + a.set(j, (int) Math.max(0L, (long) i - j - 1)); } AMapToData b = a.resize(i); String mm = a.toString() + " vs " + b.toString(); - for(int j = 0; j < s; j++){ - assertEquals(mm,a.getIndex(j), b.getIndex(j)); + for(int j = 0; j < s; j++) { + assertEquals(mm, a.getIndex(j), b.getIndex(j)); } } } } + + @Test + public void testBitSetFill() { + AMapToData a = MapToFactory.create(100, MAP_TYPE.BIT); + testFill(100, a); + } + + @Test + public void testBitSetFill64() { + int length = 64 * 3; + AMapToData a = MapToFactory.create(length, MAP_TYPE.BIT); + testFill(length, a); + } + + private void testFill(int length, AMapToData a) { + for(int i = 0; i < length; i++) { + assertEquals(0, a.getIndex(i)); + } + + a.fill(1); + for(int i = 0; i < length; i++) { + assertEquals(1, a.getIndex(i)); + } + + a.fill(0); + for(int i = 0; i < length; i++) { + assertEquals(0, a.getIndex(i)); + } + } + + @Test + public void testBitSetNextBitOutOfRange() { + MapToBit m = new MapToBit(2, 100); + assertEquals(-1, m.nextSetBit(0)); + assertEquals(-1, m.nextSetBit(1000)); + m.fill(1); + assertEquals(-1, m.nextSetBit(1000)); + assertEquals(-1, m.nextSetBit(100)); + assertEquals(99, m.nextSetBit(99)); + assertEquals(98, m.nextSetBit(98)); + + } + + @Test + public void testBitSetNextBit() { + MapToBit m = new MapToBit(2, 100); + m.set(1,1); + m.set(98,1); + assertEquals(1, m.nextSetBit(0)); + assertEquals(98, m.nextSetBit(2)); + m.fill(1); + for(int i = 0; i < 100; i++){ + m.set(i, 0); + } + + assertEquals(-1, m.nextSetBit(0)); + + + } + + + @Test + public void decompressToRange() { + double[] values = new double[] {1, 2, 3, 4, 5, 6}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 0; + int ru = map.size(); + int off = 0; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRange2() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 3; + int ru = map.size(); + int off = 0; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRange3() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 1; + int ru = map.size() - 2; + int off = 0; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRangeOffset1() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 1; + int ru = map.size() - 2; + int off = -1; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRangeOffset2() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 1; + int ru = map.size() - 2; + int off = 1; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + private void evalDecompressRange(double[] values, AMapToData map, int rl, int ru, int off) { + double[] ret = new double[map.size() + off]; + map.decompressToRange(ret, rl, ru, off, values); + String r = Arrays.toString(ret); + for(int i = 0; i < ret.length; i++) { + if(i < rl + off || i >= ru + off) + assertEquals(r + "index : " + i, 0, ret[i], 0); + else + assertEquals(r + "index : " + i, values[map.getIndex(i - off)], ret[i], 0); + } + } + + @Test + public void isEmptyBitSet() { + MapToBit m = new MapToBit(2, 1000); + assertTrue(m.isEmpty()); + m.set(134, 1); + assertFalse(m.isEmpty()); + m.set(134, 0); + assertTrue(m.isEmpty()); + m.fill(1); + assertFalse(m.isEmpty()); + m.fill(0); + assertTrue(m.isEmpty()); + } + + @Test(expected = RuntimeException.class) + public void appendNonZero() { + MapToZero m = new MapToZero(10); + IMapToDataGroup g = mock(IMapToDataGroup.class); + when(g.getMapToData()).thenReturn(new MapToBit(2, 10)); + m.appendN(new IMapToDataGroup[] {g}); + } + + @Test + public void getType() { + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + assertEquals(m, MapToFactory.create(10, m).getType()); + } + } + + @Test + public void setAndGet() { + Random r = new Random(324); + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData mm = MapToFactory.create(10, m); + int v = MapToFactory.getMaxPossible(m); + + assertEquals(v, mm.setAndGet(1, v)); + if(v != 0){ + for(int i = 0; i < 100; i++){ + int rv = r.nextInt(v); + int ri = r.nextInt(mm.size()); + assertEquals(rv, mm.setAndGet(ri, rv)); + } + } + } + } + + + @Test + public void nothingTestsForMapToZero(){ + MapToZero m = new MapToZero(10); + m.copyBit(null); // do nothing + m.replace(-1,10);// do nothing + m.set(1,1,1,null); // do nothing + assertEquals(0, m.getUpperBoundValue()); + assertEquals(m, new MapToZero(10)); + + } + + @Test + public void mapToZeroSlice(){ + MapToZero m = new MapToZero(10); + AMapToData m2 = m.slice(3,8); // return new. + assertEquals(new MapToZero(8-3),m2); + + } + + @Test + public void mapToZeroEquals(){ + MapToZero m = new MapToZero(10); + assertNotEquals(MapToFactory.create(10, MAP_TYPE.BYTE),m); + + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java index 7f51c8ed027..d509108aecd 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java @@ -19,6 +19,7 @@ package org.apache.sysds.test.component.compress.mapping; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -258,7 +259,6 @@ public void testPreAggRowsColsRange(int rl, int ru, int cl, int cu) { @Test public void testPreAggregateDenseSingleRowWithIndexes() { switch(type) { - case BIT: case INT: return; default: @@ -289,6 +289,49 @@ public void testPreAggregateSparseSingleRowWithIndexes() { } } + @Test + public void testPreAggregateSparseSingleRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique()]; + m.preAggregateSparse(sb.getSparseBlock(), pre, 0, 1); + verifyPreaggregate(m, sb, 0, 1, pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + + @Test + public void testPreAggregateSparseMultiRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique() * sb.getNumRows()]; + m.preAggregateSparse(sb.getSparseBlock(), pre, 0, sb.getNumRows()); + verifyPreaggregate(m, sb, 0, sb.getNumRows(), pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + + + private void verifyPreaggregate(AMapToData m, MatrixBlock mb, int rl, int ru, double[] ret){ + + double[] verification = new double[ret.length]; + for(int i = rl; i < ru; i++){ + for(int j = 0; j < mb.getNumColumns(); j++){ + verification[m.getIndex(j) + i * m.getUnique()] += mb.get(i,j); + } + } + assertArrayEquals(verification, ret, 0); + } + + private void compareRes(double[] expectedFull, double[] actual, int row) { String error = "\nNot equal elements with " + type + " " + m.getUnique(); int nVal = m.getUnique(); diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index b32c2acac2b..cfc6f3fe61d 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -456,4 +456,5 @@ public AMapToData getMapToData() { } } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java index 0a7f919b4fe..1140c799e1d 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java @@ -57,6 +57,12 @@ public static Collection data() { final Random r = new Random(2321522); final int sm = Integer.MAX_VALUE; + create(tests, 10, 1, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 10, 1, r.nextInt(sm)); + create(tests, 10, 10, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 1, 2, r.nextInt(sm)); + create(tests, 10, 1, 10, 2, r.nextInt(sm)); + create(tests, 10, 10, 1, 2, r.nextInt(sm)); create(tests, 10, 10, 5, 1, r.nextInt(sm)); create(tests, 10, 10, 5, 1, r.nextInt(sm)); create(tests, 100, 10, 5, 1, r.nextInt(sm)); From 7c35061298ba0e26154bc41488e10c69ec913f1f Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 17:44:03 +0100 Subject: [PATCH 06/11] add preaggregatemult direct exampe --- .../compress/mapping/CustomMappingTest.java | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java index 14224cc9f05..f57407076bd 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java @@ -38,6 +38,10 @@ import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.DMLCompressionException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; +import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToBit; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToByte; @@ -52,6 +56,9 @@ import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; import org.apache.sysds.runtime.compress.utils.IntArrayList; import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.DenseBlockFactory; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.data.SparseBlockFactory; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.junit.Test; @@ -476,4 +483,41 @@ public void mapToZeroEquals(){ assertNotEquals(MapToFactory.create(10, MAP_TYPE.BYTE),m); } + + + @Test + public void sparseMM(){ + for(MAP_TYPE t : MAP_TYPE.values()){ + if(t == MAP_TYPE.ZERO) + continue; + AMapToData map = MapToFactory.create(new int[] {0,1,1}, t); + SparseBlock sb = SparseBlockFactory.createIdentityMatrix(3); + DenseBlock ret = DenseBlockFactory.createDenseBlock(new double[3 * 10], 3, 10); + IDictionary dict = Dictionary.create(new double[]{1,1,1,2,2,2,3,3}); + IColIndex cols = ColIndexFactory.create(new int[]{1,4,8}); + + map.lmSparseMatrixRow(sb, 0, ret, cols, dict); + + for(int i = 0; i < cols.size(); i++){ + assertEquals(1, ret.get(0, cols.get(i)), 0); + } + + map.lmSparseMatrixRow(sb, 1, ret, cols, dict); + for(int i = 0; i < cols.size(); i++){ + assertEquals(2, ret.get(1, cols.get(i)), 0); + } + + map.lmSparseMatrixRow(SparseBlockFactory.createSparseBlock(10), 1, ret, cols, dict); + for(int i = 0; i < cols.size(); i++){ + assertEquals(2, ret.get(1, cols.get(i)), 0); + } + + for(int i = 0; i < 10; i++){ + assertEquals(0, ret.get(2, i), 0); + } + + assertEquals(6, ret.countNonZeros()); + } + + } } From b04810246712e171d39a5a02f3a4c1b93998bbd3 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 17:48:19 +0100 Subject: [PATCH 07/11] slice --- .../test/component/compress/mapping/MappingTests.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index cfc6f3fe61d..bf9392ce503 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -457,4 +457,15 @@ public AMapToData getMapToData() { } + + + @Test + public void slice(){ + if(m.size() > 2){ + AMapToData s = m.slice(1, m.size()-1); + for(int i = 0; i < m.size() -2; i++){ + assertEquals(m.getIndex(i+1), s.getIndex(i)); + } + } + } } From e106d6059867d58d410816581349de2ea0df1ea5 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 18:03:10 +0100 Subject: [PATCH 08/11] placeholder dict --- .../compress/colgroup/ColGroupConst.java | 3 +- .../compress/mapping/CustomMappingTest.java | 38 +++++++++++++++++++ .../compress/mapping/MappingTests.java | 3 -- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java index 380fc29b26f..1f878f5b6af 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java @@ -30,6 +30,7 @@ import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.PlaceHolderDict; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; @@ -79,7 +80,7 @@ private ColGroupConst(IColIndex colIndices, IDictionary dict) { public static AColGroup create(IColIndex colIndices, IDictionary dict) { if(dict == null) return new ColGroupEmpty(colIndices); - else if(dict.getNumberOfValues(colIndices.size()) > 1) { + else if(dict.getNumberOfValues(colIndices.size()) > 1 && !(dict instanceof PlaceHolderDict)) { // extract dict first row final double[] nd = new double[colIndices.size()]; for(int i = 0; i < colIndices.size(); i++) diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java index f57407076bd..b2c26b2327b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java @@ -19,6 +19,7 @@ package org.apache.sysds.test.component.compress.mapping; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; @@ -520,4 +521,41 @@ public void sparseMM(){ } } + + + @Test + public void counts(){ + MapToBit m = new MapToBit(2, 134); + m.set(3,1); + m.set(10,1); + m.set(110,1); + int[] counts = m.getCounts(); + for(MAP_TYPE t : MAP_TYPE.values()){ + if(t == MAP_TYPE.ZERO) + continue; + AMapToData d = MapToFactory.resizeForce(m, t); + assertArrayEquals(counts, d.getCounts()); + } + + + } + + @Test + public void countRunsAlternative(){ + MapToBit m = new MapToBit(2, 134); + m.set(3,1); + m.set(10,1); + m.set(110,1); + int counts = m.countRuns(); + for(MAP_TYPE t : MAP_TYPE.values()){ + if(t == MAP_TYPE.ZERO) + continue; + AMapToData d = MapToFactory.resizeForce(m, t); + assertEquals(counts, d.countRuns()); + } + + + } + + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index bf9392ce503..91f5138cb72 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -187,10 +187,7 @@ public void equalsTest() { @Test public void countRuns() { - if(m.getUnique() > m.size()) - return; int runs = m.countRuns(); - assertTrue(runs <= m.size()); } From 33140127dda6b1a8f9d0c07b729aba41f796afc4 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 18:15:36 +0100 Subject: [PATCH 09/11] set range --- .../compress/mapping/MappingTests.java | 57 +++++++++++++++++-- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index 91f5138cb72..df84878fc25 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -41,6 +41,7 @@ import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToBit; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToCharPByte; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; @@ -454,14 +455,58 @@ public AMapToData getMapToData() { } + @Test + public void slice() { + if(m.size() > 2) { + AMapToData s = m.slice(1, m.size() - 1); + for(int i = 0; i < m.size() - 2; i++) { + assertEquals(m.getIndex(i + 1), s.getIndex(i)); + } + } + } + + @Test + public void setRange() { + AMapToData tmp = MapToFactory.create(m.size(), m.getUnique()); + tmp.copy(m); + + tmp.set(0, m.size(), 0, new MapToZero(size)); + for(int i = 0; i < m.size(); i++) + assertEquals(0, tmp.getIndex(i)); + if(m.size() > 11) { + tmp.copy(m); - @Test - public void slice(){ - if(m.size() > 2){ - AMapToData s = m.slice(1, m.size()-1); - for(int i = 0; i < m.size() -2; i++){ - assertEquals(m.getIndex(i+1), s.getIndex(i)); + tmp.set(10, m.size(), 0, new MapToZero(size)); + for(int i = 0; i < 10; i++) + assertEquals(m.getIndex(i), tmp.getIndex(i)); + for(int i = 10; i < m.size(); i++) + assertEquals(0, tmp.getIndex(i)); + + if(m instanceof MapToZero) + return; + tmp.copy(m); + AMapToData tmp2 = new MapToBit(2, size - 10); + tmp2.fill(1); + tmp2.set(0, 0); + tmp.set(10, m.size(), 0, tmp2); + for(int i = 0; i < 10; i++) + assertEquals(m.getIndex(i), tmp.getIndex(i)); + assertEquals(0, tmp.getIndex(10)); + for(int i = 11; i < m.size(); i++) + assertEquals(1, tmp.getIndex(i)); + + for(MAP_TYPE t : MAP_TYPE.values()) { + if(t == MAP_TYPE.ZERO) + continue; + tmp.copy(m); + tmp2 = MapToFactory.resizeForce(tmp2, t); + tmp.set(10, m.size(), 0, tmp2); + for(int i = 0; i < 10; i++) + assertEquals(m.getIndex(i), tmp.getIndex(i)); + assertEquals(0, tmp.getIndex(10)); + for(int i = 11; i < m.size(); i++) + assertEquals(1, tmp.getIndex(i)); } } } From 874e894de0b9ec7df591265ec941d689062525c0 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 19:19:30 +0100 Subject: [PATCH 10/11] fix MapToChar --- .../compress/colgroup/mapping/AMapToData.java | 4 ---- .../runtime/compress/colgroup/mapping/MapToChar.java | 12 +++--------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index 29eedf45561..66e8a3f0cca 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -833,7 +833,6 @@ public void copyInt(int[] d) { public abstract void copyInt(int[] d, int start, int end); - public void copyBit(MapToBit d) { fill(0); for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) { @@ -998,9 +997,6 @@ public AMapToData[] splitReshapeDDC(final int multiplier) { for(int i = 0; i < multiplier; i++) ret[i] = MapToFactory.create(eachSize, getUnique()); - // for(int i = 0; i < s; i += multiplier) - // splitReshapeDDCRow(ret, multiplier, i); - final int blkz = Math.max(eachSize / 8, 2048) * multiplier; for(int i = 0; i < s; i += blkz) splitReshapeDDCBlock(ret, multiplier, i, Math.min(i + blkz, s)); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index 00966efc969..4651ea8439b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -45,7 +45,7 @@ public class MapToChar extends AMapToData { private final char[] _data; protected MapToChar(int size) { - this(Character.MAX_VALUE+1, size); + this(Character.MAX_VALUE + 1, size); } public MapToChar(int unique, int size) { @@ -95,10 +95,6 @@ public void set(int n, int v) { _data[n] = (char) v; } - public void set(int n, char v) { - _data[n] = v; - } - @Override public void set(int l, int u, int off, AMapToData tm) { if(tm instanceof MapToChar) { @@ -167,7 +163,7 @@ protected static MapToChar readFields(DataInput in) throws IOException { final int length = in.readInt(); final char[] data = new char[length]; for(int i = 0; i < length; i++) - data[i] = (char)in.readUnsignedShort(); + data[i] = (char) in.readUnsignedShort(); return new MapToChar(unique, data); } @@ -236,8 +232,6 @@ public void copyInt(int[] d, int start, int end) { _data[i] = (char) d[i]; } - - @Override public int[] getCounts(int[] ret) { for(int i = 0; i < _data.length; i++) @@ -322,7 +316,7 @@ public AMapToData appendN(IMapToDataGroup[] d) { @Override public int getMaxPossible() { - return Character.MAX_VALUE ; + return Character.MAX_VALUE; } @Override From b6de449bf44fd9dd98684d568f7ee76d6f00ca5d Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Dec 2024 19:44:14 +0100 Subject: [PATCH 11/11] cleanup --- .../compress/colgroup/mapping/AMapToData.java | 10 ----- .../compress/colgroup/mapping/MapToBit.java | 24 ++-------- .../compress/colgroup/mapping/MapToByte.java | 11 ++--- .../compress/colgroup/mapping/MapToChar.java | 8 ---- .../colgroup/mapping/MapToCharPByte.java | 8 ---- .../compress/colgroup/mapping/MapToInt.java | 5 --- .../compress/colgroup/mapping/MapToUByte.java | 5 --- .../compress/colgroup/mapping/MapToZero.java | 5 --- .../compress/colgroup/scheme/DDCSchemeSC.java | 2 +- .../compress/mapping/CustomMappingTest.java | 44 +++++++++++++++++-- .../mapping/MappingPreAggregateTests.java | 33 ++++++++++++++ .../mapping/PreAggregateDDC_DDCTest.java | 14 ++++++ 12 files changed, 94 insertions(+), 75 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index 66e8a3f0cca..5fc2acaea7a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -425,8 +425,6 @@ public final int[] getCounts() { * @param nCol The number of columns */ public final void preAggregateDDC_DDC(AMapToData tm, IDictionary td, Dictionary ret, int nCol) { - if(td.getNumberOfValues(nCol) != tm.nUnique) - throw new DMLCompressionException("Invalid map and dict combination"); if(nCol == 1) preAggregateDDC_DDCSingleCol(tm, td.getValues(), ret.getValues()); else @@ -849,14 +847,6 @@ public int getMax() { return m; } - /** - * Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values therefore - * max is 1 - * - * @return The maximum number of distinct values to encode - */ - public abstract int getMaxPossible(); - /** * Reallocate the map, to a smaller instance if applicable. Note it does not change the length of the array, just the * datatype. diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index 8e3e560be3b..c1ea64e55a7 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -298,9 +298,10 @@ protected JoinBitSets(MapToBit t_data, MapToBit o_data, int size) { final long[] t_longs = t_data._data; final long[] _longs = o_data._data; - final int common = Math.min(t_longs.length, _longs.length); + if(t_longs.length != _longs.length) + throw new RuntimeException("Invalid to join bit sets not same length"); - for(int i = 0; i < common; i++) { + for(int i = 0; i < _longs.length; i++) { long t = t_longs[i]; long v = _longs[i]; tt += Long.bitCount(t & v); @@ -309,21 +310,6 @@ protected JoinBitSets(MapToBit t_data, MapToBit o_data, int size) { ff += Long.bitCount(~t & ~v); } - if(t_longs.length > common) { - for(int i = common; i < t_longs.length; i++) { - int v = Long.bitCount(t_longs[i]); - ft += v; - ff += 64 - v; - } - } - else if(_longs.length > common) { - for(int i = common; i < _longs.length; i++) { - int v = Long.bitCount(_longs[i]); - tf += v; - ff += 64 - v; - } - } - final int longest = Math.max(t_longs.length, _longs.length); ff += size - (longest * 64); // remainder } @@ -445,10 +431,6 @@ private static int longSize(int size) { return Math.max(size >> 6, 0) + 1; } - public int getMaxPossible() { - return 1; - } - @Override public void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { for(int i = rl, offT = rl + offR; i < ru; i++, offT++) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java index fb7ae3d981c..3a28c91c592 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java @@ -218,7 +218,7 @@ public AMapToData resize(int unique) { return new MapToZero(size); else if(unique == 2 && size > 32) ret = new MapToBit(unique, size); - else if(unique <= 127) { + else if(unique <= 128) { ret = toUByte(); ret.setUnique(unique); return ret; @@ -249,7 +249,7 @@ public AMapToData append(AMapToData t) { System.arraycopy(tbb, 0, ret, _data.length, t.size()); // return - if(newDistinct < 127) + if(newDistinct < 128) return new MapToUByte(newDistinct, ret); else return new MapToByte(newDistinct, ret); @@ -276,17 +276,12 @@ public AMapToData appendN(IMapToDataGroup[] d) { } } - if(getUnique() < 127) + if(getUnique() < 128) return new MapToUByte(getUnique(), ret); else return new MapToByte(getUnique(), ret); } - @Override - public int getMaxPossible() { - return 255; - } - @Override public void lmSparseMatrixRow(SparseBlock sb, final int r, DenseBlock db, final IColIndex colIndexes, final IDictionary dict) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index 4651ea8439b..fb6317ec1a3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -167,10 +167,6 @@ protected static MapToChar readFields(DataInput in) throws IOException { return new MapToChar(unique, data); } - protected char[] getChars() { - return _data; - } - @Override protected void preAggregateDenseToRowBy8(double[] mV, double[] preAV, int cl, int cu, int off) { final int h = (cu - cl) % 8; @@ -314,10 +310,6 @@ public AMapToData appendN(IMapToDataGroup[] d) { return new MapToChar(getUnique(), ret); } - @Override - public int getMaxPossible() { - return Character.MAX_VALUE; - } @Override public boolean equals(AMapToData e) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java index 92a41ab8ade..a14a3ddd281 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java @@ -158,14 +158,6 @@ protected static MapToCharPByte readFields(DataInput in) throws IOException { return new MapToCharPByte(unique, data_c, data_b); } - protected char[] getChars() { - return _data_c; - } - - protected byte[] getBytes() { - return _data_b; - } - @Override public int getUpperBoundValue() { return max; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java index 0c31d251222..3dcec05e373 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java @@ -279,11 +279,6 @@ public AMapToData appendN(IMapToDataGroup[] d) { return new MapToInt(getUnique(), ret); } - @Override - public int getMaxPossible() { - return Integer.MAX_VALUE; - } - @Override public boolean equals(AMapToData e) { return e instanceof MapToInt && // diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java index a68a6c49de0..97cbfdcde27 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java @@ -120,11 +120,6 @@ public int[] getCounts(int[] ret) { return ret; } - @Override - public int getMaxPossible() { - return 127; - } - @Override protected void decompressToRangeNoOffBy8(double[] c, int r, double[] values) { c[r] += values[_data[r]]; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java index 4caee98486b..b839fc336c2 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java @@ -182,11 +182,6 @@ public AMapToData appendN(IMapToDataGroup[] d) { return new MapToZero(p); } - @Override - public int getMaxPossible() { - return 0; - } - @Override public boolean equals(AMapToData e) { return e instanceof MapToZero && // diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java index b679c745e00..49bf8b52e49 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java @@ -174,7 +174,7 @@ protected Pair tryUpdateAndEncode(MatrixBlock data, IColI } private void encodeAndUpdate(MatrixBlock data, AMapToData d, int col) { - final int max = d.getMaxPossible(); + final int max = d.getUpperBoundValue(); if(data.isInSparseFormat()) encodeAndUpdateSparse(data, d, col, max); else if(data.getDenseBlock().isContiguous()) diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java index b2c26b2327b..e679f4e8b15 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java @@ -233,8 +233,8 @@ public void resize() { int s = 10; for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { AMapToData a = MapToFactory.create(s, m); - a.getMaxPossible(); - for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getMaxPossible(); i = i * 2) { + a.getUpperBoundValue(); + for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getUpperBoundValue(); i = i * 2) { for(int j = 0; j < s; j++) { a.set(j, (int) Math.max(0L, (long) i - j - 1)); } @@ -253,8 +253,8 @@ public void resize2() { int s = 42; for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { AMapToData a = MapToFactory.create(s, m); - a.getMaxPossible(); - for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getMaxPossible(); i = i * 2) { + a.getUpperBoundValue(); + for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getUpperBoundValue(); i = i * 2) { for(int j = 0; j < s; j++) { a.set(j, (int) Math.max(0L, (long) i - j - 1)); } @@ -478,6 +478,15 @@ public void mapToZeroSlice(){ } + + @Test + public void mapToBitEmptySlice(){ + MapToBit m = new MapToBit(2, 20); + AMapToData m2 = m.slice(3,8); // return new. + assertEquals(new MapToZero(8-3),m2); + + } + @Test public void mapToZeroEquals(){ MapToZero m = new MapToZero(10); @@ -558,4 +567,31 @@ public void countRunsAlternative(){ } + @Test + public void bitToZero(){ + MapToBit m = new MapToBit(2, 10); + m.fill(1); + AMapToData a = m.resize(1); + for(int i = 0; i < 10; i++) + assertEquals(0, a.getIndex(i)); + } + + @Test(expected = RuntimeException.class) + public void invalidJoin(){ + MapToBit a = new MapToBit(2, 100); + MapToBit b = new MapToBit(2, 200); + + a.preAggregateDDC_DDCSingleCol(b, null,null); + } + + @Test + public void equalsMapToZZero(){ + MapToZero m = new MapToZero(10); + assertEquals(m, new MapToZero(10)); + assertNotEquals(m, new MapToZero(11)); + assertNotEquals(m, new MapToZero(1)); + assertNotEquals(m, new MapToBit(2, 1)); + assertNotEquals(m, new MapToBit(2, 10)); + } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java index d509108aecd..7e4191a5d0b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java @@ -35,6 +35,7 @@ import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetByte; +import org.apache.sysds.runtime.data.SparseBlockFactory; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.test.TestUtils; import org.junit.Test; @@ -319,6 +320,38 @@ public void testPreAggregateSparseMultiRow() { } } + @Test + public void testPreAggregateSparseEmptySingleRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique()]; + MatrixBlock sb2 = new MatrixBlock(sb.getNumRows(), sb.getNumColumns(), 0, SparseBlockFactory.createSparseBlock(sb.getNumRows())); + m.preAggregateSparse(sb2.getSparseBlock(), pre, 0, 1); + verifyPreaggregate(m, sb2, 0, 1, pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + + @Test + public void testPreAggregateSparseEmptyMultiRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique() * sb.getNumRows()]; + MatrixBlock sb2 = new MatrixBlock(sb.getNumRows(), sb.getNumColumns(), 0, SparseBlockFactory.createSparseBlock(sb.getNumRows())); + m.preAggregateSparse(sb2.getSparseBlock(), pre, 0, sb.getNumRows()); + verifyPreaggregate(m, sb2, 0, sb.getNumRows(), pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + private void verifyPreaggregate(AMapToData m, MatrixBlock mb, int rl, int ru, double[] ret){ diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java index 1140c799e1d..20a027ca120 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java @@ -62,6 +62,20 @@ public static Collection data() { create(tests, 10, 10, 1, 1, r.nextInt(sm)); create(tests, 10, 1, 1, 2, r.nextInt(sm)); create(tests, 10, 1, 10, 2, r.nextInt(sm)); + + create(tests, 10, 1, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 2, 1, r.nextInt(sm)); + create(tests, 10, 2, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 1, 2, r.nextInt(sm)); + create(tests, 10, 1, 2, 2, r.nextInt(sm)); + + create(tests, 66, 1, 1, 1, r.nextInt(sm)); + create(tests, 66, 1, 2, 1, r.nextInt(sm)); + create(tests, 66, 2, 1, 1, r.nextInt(sm)); + create(tests, 66, 1, 1, 2, r.nextInt(sm)); + create(tests, 66, 1, 2, 2, r.nextInt(sm)); + + create(tests, 10, 10, 1, 2, r.nextInt(sm)); create(tests, 10, 10, 5, 1, r.nextInt(sm)); create(tests, 10, 10, 5, 1, r.nextInt(sm));