diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java index 380fc29b26f..1f878f5b6af 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java @@ -30,6 +30,7 @@ import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.IdentityDictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.PlaceHolderDict; import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; @@ -79,7 +80,7 @@ private ColGroupConst(IColIndex colIndices, IDictionary dict) { public static AColGroup create(IColIndex colIndices, IDictionary dict) { if(dict == null) return new ColGroupEmpty(colIndices); - else if(dict.getNumberOfValues(colIndices.size()) > 1) { + else if(dict.getNumberOfValues(colIndices.size()) > 1 && !(dict instanceof PlaceHolderDict)) { // extract dict first row final double[] nd = new double[colIndices.size()]; for(int i = 0; i < colIndices.size(); i++) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index 0765a158c52..5fc2acaea7a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -22,7 +22,10 @@ import java.io.DataOutput; import java.io.IOException; import java.io.Serializable; -import java.util.BitSet; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.logging.Log; @@ -95,7 +98,6 @@ public final void setUnique(int nUnique) { */ public abstract int getIndex(int n); - /** * Shortcut method to support Integer objects, not really efficient but for the purpose of reusing code. * @@ -116,6 +118,18 @@ public void set(int n, Integer v) { */ public abstract void set(int n, int v); + /** + * set a range of values from another map. + * + * The given tm must only contain supported values, and it is not verified. + * + * @param l lower bound + * @param u upper bound (not inclusive) + * @param off offset to take values from tm + * @param tm the other map to copy values from + */ + public abstract void set(int l, int u, int off, AMapToData tm); + /** * Set the index to the value and get the contained value after. * @@ -411,8 +425,6 @@ public final int[] getCounts() { * @param nCol The number of columns */ public final void preAggregateDDC_DDC(AMapToData tm, IDictionary td, Dictionary ret, int nCol) { - if(td.getNumberOfValues(nCol) != tm.nUnique) - throw new DMLCompressionException("Invalid map and dict combination"); if(nCol == 1) preAggregateDDC_DDCSingleCol(tm, td.getValues(), ret.getValues()); else @@ -788,9 +800,9 @@ public void preAggregateDDC_RLE(int[] ptr, char[] data, IDictionary td, Dictiona */ public void copy(AMapToData d) { if(d.nUnique == 1) - return; - // else if(d instanceof MapToBit) - // copyBit((MapToBit) d); + fill(0); + else if(d instanceof MapToBit) + copyBit((MapToBit) d); else if(d instanceof MapToInt) copyInt((MapToInt) d); else { @@ -813,9 +825,18 @@ protected void copyInt(MapToInt d) { * * @param d The array to copy */ - public abstract void copyInt(int[] d); + public void copyInt(int[] d) { + copyInt(d, 0, Math.min(d.length, size())); + } + + public abstract void copyInt(int[] d, int start, int end); - public abstract void copyBit(BitSet d); + public void copyBit(MapToBit d) { + fill(0); + for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) { + set(i, 1); + } + } public int getMax() { int m = -1; @@ -826,13 +847,6 @@ public int getMax() { return m; } - /** - * Get the maximum possible value to encode in this encoding. For instance in a bit you can encode 2 values - * - * @return The maximum number of distinct values to encode - */ - public abstract int getMaxPossible(); - /** * Reallocate the map, to a smaller instance if applicable. Note it does not change the length of the array, just the * datatype. @@ -887,7 +901,8 @@ public int countRuns(AOffset off) { @Override public boolean equals(Object e) { - return e instanceof AMapToData && (this == e || this.equals((AMapToData) e)); + return this == e || // same object or + (e instanceof AMapToData && this.equals((AMapToData) e)); } /** @@ -903,7 +918,7 @@ public void verify() { if(CompressedMatrixBlock.debug) { for(int i = 0; i < size(); i++) { if(getIndex(i) >= nUnique) { - throw new DMLCompressionException("invalid construction of Mapping data containing values above unique"); + throw new DMLCompressionException("Invalid construction of Mapping data containing values above unique"); } } } @@ -934,7 +949,7 @@ public void decompressToRange(double[] c, int rl, int ru, int offR, double[] val decompressToRangeOff(c, rl, ru, offR, values); } - public void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { + protected void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { for(int i = rl, offT = rl + offR; i < ru; i++, offT++) c[offT] += values[getIndex(i)]; } @@ -950,7 +965,7 @@ protected void decompressToRangeNoOffBy8(double[] c, int r, double[] values) { c[r + 7] += values[getIndex(r + 7)]; } - public void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { + protected void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { final int h = (ru - rl) % 8; for(int rc = rl; rc < rl + h; rc++) c[rc] += values[getIndex(rc)]; @@ -958,6 +973,62 @@ public void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) decompressToRangeNoOffBy8(c, rc, values); } + /** + * Split this mapping into x smaller mappings according to round robin. + * + * @param multiplier The number of smaller mappings to construct + * @return The list of smaller mappings + */ + public AMapToData[] splitReshapeDDC(final int multiplier) { + + final int s = size(); + final AMapToData[] ret = new AMapToData[multiplier]; + final int eachSize = s / multiplier; + for(int i = 0; i < multiplier; i++) + ret[i] = MapToFactory.create(eachSize, getUnique()); + + final int blkz = Math.max(eachSize / 8, 2048) * multiplier; + for(int i = 0; i < s; i += blkz) + splitReshapeDDCBlock(ret, multiplier, i, Math.min(i + blkz, s)); + + return ret; + } + + public AMapToData[] splitReshapeDDCPushDown(final int multiplier, final ExecutorService pool) throws Exception { + + final int s = size(); + final AMapToData[] ret = new AMapToData[multiplier]; + final int eachSize = s / multiplier; + for(int i = 0; i < multiplier; i++) + ret[i] = MapToFactory.create(eachSize, getUnique()); + + final int blkz = Math.max(eachSize / 8, 2048) * multiplier; + List> tasks = new ArrayList<>(); + for(int i = 0; i < s; i += blkz) { + final int start = i; + final int end = Math.min(i + blkz, s); + tasks.add(pool.submit(() -> splitReshapeDDCBlock(ret, multiplier, start, end))); + } + + for(Future t : tasks) + t.get(); + + return ret; + } + + private void splitReshapeDDCBlock(final AMapToData[] ret, final int multiplier, final int start, final int end) { + + for(int i = start; i < end; i += multiplier) + splitReshapeDDCRow(ret, multiplier, i); + } + + private void splitReshapeDDCRow(final AMapToData[] ret, final int multiplier, final int i) { + final int off = i / multiplier; + final int end = i + multiplier; + for(int j = i; j < end; j++) + ret[j % multiplier].set(off, getIndex(j)); + } + @Override public String toString() { final int sz = size(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index f42783fca9f..c1ea64e55a7 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -77,15 +77,7 @@ public MapToBit(int unique, int size) { } private MapToBit(int unique, BitSet d, int size) { - super(unique); - long[] bsd = d.toLongArray(); - if(bsd.length == longSize(size)) - _data = bsd; - else { - _data = new long[longSize(size)]; - System.arraycopy(bsd, 0, _data, 0, bsd.length); - } - _size = size; + this(unique, d.toLongArray(), size); } private MapToBit(int unique, long[] bsd, int size) { @@ -99,10 +91,6 @@ private MapToBit(int unique, long[] bsd, int size) { _size = size; } - protected long[] getData() { - return _data; - } - @Override public MAP_TYPE getType() { return MapToFactory.MAP_TYPE.BIT; @@ -110,17 +98,19 @@ public MAP_TYPE getType() { @Override public int getIndex(int n) { - int wIdx = n >> 6; // same as divide by 64 bit faster + int wIdx = n >> 6; // same as divide by 64 but faster return (_data[wIdx] & (1L << n)) != 0L ? 1 : 0; } @Override public void fill(int v) { - long re = (_data.length * 64) - _size; - if(re == 0 || v == 0) - Arrays.fill(_data, v == 0 ? 0L : -1L); + final long re = (_data.length * 64) - _size; + final boolean fillZero = v == 0; + final long fillValue = fillZero ? 0L : -1L; + if(re == 0 || fillZero) + Arrays.fill(_data, fillValue); else { - Arrays.fill(_data, 0, _data.length - 1, v == 0 ? 0L : -1L); + Arrays.fill(_data, 0, _data.length - 1, fillValue); _data[_data.length - 1] = -1L >>> re; } } @@ -145,10 +135,17 @@ public void set(int n, int v) { _data[wIdx] &= ~(1L << n); } + @Override + public void set(int l, int u, int off, AMapToData tm) { + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + @Override public int setAndGet(int n, int v) { set(n, v); - return 1; + return v == 1 ? 1 : 0; } @Override @@ -254,30 +251,39 @@ public boolean isEmpty() { } @Override - public void copy(AMapToData d) { - // if(d instanceof MapToBit) - // copyBit((MapToBit) d); - if(d instanceof MapToInt) - copyInt((MapToInt) d); - else { - final int sz = size(); - for(int i = 0; i < sz; i++) - set(i, d.getIndex(i)); - } + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) + set(i, d[i]); } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _size; i++) - set(i, d[i]); + public void copyBit(MapToBit d) { + long[] vals = d._data; + System.arraycopy(vals, 0, _data, 0, Math.min(vals.length, _data.length)); } - @Override - public void copyBit(BitSet d) { - long[] vals = d.toLongArray(); - System.arraycopy(vals, 0, _data, 0, vals.length); - if(vals.length < _data.length) - Arrays.fill(_data, vals.length, _data.length, 0L); + /** + * Return the index of the next bit set to one. If no more bits are set to one return -1. The method behaves + * similarly to and is inspired from java's BitSet. If a negative value is given as input it fails. + * + * @param fromIndex The index to start from (inclusive) + * @return The next valid index. + */ + public int nextSetBit(int fromIndex) { + if(fromIndex >= _size) + return -1; + int u = fromIndex >> 6; // long trick instead of division by 64. + final int s = _data.length; + // mask out previous set bits in this word. + long word = _data[u] & (0xffffffffffffffffL << fromIndex); + + while(true) { + if(word != 0) + return (u * 64) + Long.numberOfTrailingZeros(word); + if(++u == s) + return -1; + word = _data[u]; + } } private static class JoinBitSets { @@ -292,9 +298,10 @@ protected JoinBitSets(MapToBit t_data, MapToBit o_data, int size) { final long[] t_longs = t_data._data; final long[] _longs = o_data._data; - final int common = Math.min(t_longs.length, _longs.length); + if(t_longs.length != _longs.length) + throw new RuntimeException("Invalid to join bit sets not same length"); - for(int i = 0; i < common; i++) { + for(int i = 0; i < _longs.length; i++) { long t = t_longs[i]; long v = _longs[i]; tt += Long.bitCount(t & v); @@ -303,21 +310,6 @@ protected JoinBitSets(MapToBit t_data, MapToBit o_data, int size) { ff += Long.bitCount(~t & ~v); } - if(t_longs.length > common) { - for(int i = common; i < t_longs.length; i++) { - int v = Long.bitCount(t_longs[i]); - ft += v; - ff += 64 - v; - } - } - else if(_longs.length > common) { - for(int i = common; i < _longs.length; i++) { - int v = Long.bitCount(_longs[i]); - tf += v; - ff += 64 - v; - } - } - final int longest = Math.max(t_longs.length, _longs.length); ff += size - (longest * 64); // remainder } @@ -379,7 +371,12 @@ public int countRuns() { @Override public AMapToData slice(int l, int u) { long[] s = BitSetArray.sliceVectorized(_data, l, u); - return new MapToBit(getUnique(), s, u - l); + MapToBit m = new MapToBit(getUnique(), s, u - l); + + if(m.isEmpty()) + return new MapToZero(u - l); + else + return m; } @Override @@ -434,10 +431,6 @@ private static int longSize(int size) { return Math.max(size >> 6, 0) + 1; } - public int getMaxPossible() { - return 2; - } - @Override public void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { for(int i = rl, offT = rl + offR; i < ru; i++, offT++) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java index 30a26735744..3a28c91c592 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java @@ -23,7 +23,6 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -95,6 +94,23 @@ public void set(int n, int v) { _data[n] = (byte) v; } + @Override + public void set(int l, int u, int off, AMapToData tm){ + if(tm instanceof MapToByte){ + MapToByte tbm = (MapToByte)tm; + byte[] tbv = tbm._data; + for(int i = l; i < u; i++, off++) { + _data[i] = tbv[off]; + } + } + else{ + + for(int i = l; i < u; i++, off++) { + _data[i] = (byte)tm.getIndex(off); + } + } + } + @Override public int setAndGet(int n, int v) { _data[n] = (byte) v; @@ -136,13 +152,14 @@ public void replace(int v, int r) { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _data.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) _data[i] = (byte) d[i]; } @Override - public void copyBit(BitSet d) { + public void copyBit(MapToBit d) { + fill(0); for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) { _data[i] = 1; } @@ -201,7 +218,7 @@ public AMapToData resize(int unique) { return new MapToZero(size); else if(unique == 2 && size > 32) ret = new MapToBit(unique, size); - else if(unique <= 127) { + else if(unique <= 128) { ret = toUByte(); ret.setUnique(unique); return ret; @@ -232,7 +249,7 @@ public AMapToData append(AMapToData t) { System.arraycopy(tbb, 0, ret, _data.length, t.size()); // return - if(newDistinct < 127) + if(newDistinct < 128) return new MapToUByte(newDistinct, ret); else return new MapToByte(newDistinct, ret); @@ -259,17 +276,12 @@ public AMapToData appendN(IMapToDataGroup[] d) { } } - if(getUnique() < 127) + if(getUnique() < 128) return new MapToUByte(getUnique(), ret); else return new MapToByte(getUnique(), ret); } - @Override - public int getMaxPossible() { - return 256; - } - @Override public void lmSparseMatrixRow(SparseBlock sb, final int r, DenseBlock db, final IColIndex colIndexes, final IDictionary dict) { @@ -320,13 +332,13 @@ public void decompressToRange(double[] c, int rl, int ru, int offR, double[] val } @Override - public void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { + protected void decompressToRangeOff(double[] c, int rl, int ru, int offR, double[] values) { for(int i = rl, offT = rl + offR; i < ru; i++, offT++) c[offT] += values[getIndex(i)]; } @Override - public void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { + protected void decompressToRangeNoOff(double[] c, int rl, int ru, double[] values) { // OVERWRITTEN FOR JIT COMPILE! final int h = (ru - rl) % 8; for(int rc = rl; rc < rl + h; rc++) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index 690d54af9b8..fb6317ec1a3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -22,8 +22,11 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; -import java.util.BitSet; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -42,7 +45,7 @@ public class MapToChar extends AMapToData { private final char[] _data; protected MapToChar(int size) { - this(Character.MAX_VALUE, size); + this(Character.MAX_VALUE + 1, size); } public MapToChar(int unique, int size) { @@ -92,6 +95,22 @@ public void set(int n, int v) { _data[n] = (char) v; } + @Override + public void set(int l, int u, int off, AMapToData tm) { + if(tm instanceof MapToChar) { + MapToChar tbm = (MapToChar) tm; + char[] tbv = tbm._data; + for(int i = l; i < u; i++, off++) { + _data[i] = tbv[off]; + } + } + else { + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + } + @Override public int setAndGet(int n, int v) { return _data[n] = (char) v; @@ -144,14 +163,10 @@ protected static MapToChar readFields(DataInput in) throws IOException { final int length = in.readInt(); final char[] data = new char[length]; for(int i = 0; i < length; i++) - data[i] = in.readChar(); + data[i] = (char) in.readUnsignedShort(); return new MapToChar(unique, data); } - protected char[] getChars() { - return _data; - } - @Override protected void preAggregateDenseToRowBy8(double[] mV, double[] preAV, int cl, int cu, int off) { final int h = (cu - cl) % 8; @@ -208,18 +223,11 @@ public int getUpperBoundValue() { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _data.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) _data[i] = (char) d[i]; } - @Override - public void copyBit(BitSet d) { - for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) { - _data[i] = 1; - } - } - @Override public int[] getCounts(int[] ret) { for(int i = 0; i < _data.length; i++) @@ -302,10 +310,6 @@ public AMapToData appendN(IMapToDataGroup[] d) { return new MapToChar(getUnique(), ret); } - @Override - public int getMaxPossible() { - return Character.MAX_VALUE; - } @Override public boolean equals(AMapToData e) { @@ -391,4 +395,38 @@ protected final void preAggregateDDC_DDCSingleCol_vecChar(MapToChar tm, double[] v[getIndex(r8)] += td[tm.getIndex(r8)]; } + @Override + public AMapToData[] splitReshapeDDCPushDown(final int multiplier, final ExecutorService pool) throws Exception { + final int s = size(); + final MapToChar[] ret = new MapToChar[multiplier]; + final int eachSize = s / multiplier; + for(int i = 0; i < multiplier; i++) + ret[i] = new MapToChar(getUnique(), eachSize); + + final int blkz = Math.max(eachSize / 8, 2048) * multiplier; + List> tasks = new ArrayList<>(); + for(int i = 0; i < s; i += blkz) { + final int start = i; + final int end = Math.min(i + blkz, s); + tasks.add(pool.submit(() -> splitReshapeDDCBlock(ret, multiplier, start, end))); + } + + for(Future t : tasks) + t.get(); + + return ret; + } + + private void splitReshapeDDCBlock(final MapToChar[] ret, final int multiplier, final int start, final int end) { + for(int i = start; i < end; i += multiplier) + splitReshapeDDCRow(ret, multiplier, i); + } + + private void splitReshapeDDCRow(final MapToChar[] ret, final int multiplier, final int i) { + final int off = i / multiplier; + final int end = i + multiplier; + for(int j = i; j < end; j++) + ret[j % multiplier]._data[off] = _data[j]; + } + } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java index 28a68855575..a14a3ddd281 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java @@ -23,7 +23,6 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -38,8 +37,7 @@ public class MapToCharPByte extends AMapToData { private static final long serialVersionUID = 6315708056775476541L; - // 8323073 - public static final int max = 0xFFFF * 127; + public static final int max = (0xFFFF + 1) * 128 -1; private final char[] _data_c; private final byte[] _data_b; // next byte after the char @@ -101,6 +99,13 @@ public void set(int n, int v) { _data_b[n] = (byte) (m >> 16); } + @Override + public void set(int l, int u, int off, AMapToData tm){ + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + @Override public int setAndGet(int n, int v) { int m = v & 0xffffff; @@ -153,30 +158,17 @@ protected static MapToCharPByte readFields(DataInput in) throws IOException { return new MapToCharPByte(unique, data_c, data_b); } - protected char[] getChars() { - return _data_c; - } - - protected byte[] getBytes() { - return _data_b; - } - @Override public int getUpperBoundValue() { return max; } @Override - public void copyInt(int[] d) { - for(int i = 0; i < d.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) set(i, d[i]); } - @Override - public void copyBit(BitSet d) { - for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) - _data_c[i] = 1; - } @Override public int[] getCounts(int[] ret) { @@ -273,7 +265,7 @@ public AMapToData appendN(IMapToDataGroup[] d) { @Override public int getMaxPossible() { - return Character.MAX_VALUE * 256; + return (Character.MAX_VALUE+1) * 256 -1; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java index 970220d9cc2..db139a8ce7a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToFactory.java @@ -21,9 +21,15 @@ import java.io.DataInput; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.compress.utils.IntArrayList; +import org.apache.sysds.runtime.util.CommonThreadPool; /** Interface for the factory design pattern for construction all AMapToData. */ public interface MapToFactory { @@ -63,28 +69,51 @@ public static AMapToData create(int size, int[] values, int nUnique) { return _data; } + public static AMapToData create(int unique, IntArrayList values) { + AMapToData _data = create(values.size(), unique); + _data.copyInt(values.extractValues()); + return _data; + } + + public static AMapToData create(int size, int[] values, int nUnique, int k) throws Exception { + AMapToData _data = create(size, nUnique); + final ExecutorService pool = CommonThreadPool.get(k); + int blk = Math.max((values.length / k), 1024); + blk -= blk % 64; // ensure long size + List> tasks = new ArrayList<>(); + for(int i = 0; i < values.length; i += blk) { + int start = i; + int end = Math.min(i + blk, values.length); + tasks.add(pool.submit(() -> _data.copyInt(values, start, end))); + } + + for(Future t : tasks) + t.get(); + return _data; + } + /** - * Create and allocate a map with the given size and support for upto the num tuples argument of values + * Create and allocate a map with the given size and support for up to the num tuples argument of values * - * @param size The number of cells to allocate - * @param numTuples The maximum value to be able to represent inside the map. + * @param size The number of cells to allocate + * @param unique The number of unique values to support (can encode unique -1) * @return A new map */ - public static AMapToData create(final int size, final int numTuples) { - if(numTuples <= 1) + public static AMapToData create(final int size, final int unique) { + if(unique <= 1) return new MapToZero(size); - else if(numTuples == 2 && size > 32) - return new MapToBit(numTuples, size); - else if(numTuples <= 127) - return new MapToUByte(numTuples, size); - else if(numTuples <= 256) - return new MapToByte(numTuples, size); - else if(numTuples <= Character.MAX_VALUE + 1) - return new MapToChar(numTuples, size); - else if(numTuples <= MapToCharPByte.max) - return new MapToCharPByte(numTuples, size); + else if(unique == 2 && size > 32) + return new MapToBit(unique, size); + else if(unique <= 128) + return new MapToUByte(unique, size); + else if(unique <= 256) + return new MapToByte(unique, size); + else if(unique <= Character.MAX_VALUE + 1) + return new MapToChar(unique, size); + else if(unique <= MapToCharPByte.max + 1) + return new MapToCharPByte(unique, size); else - return new MapToInt(numTuples, size); + return new MapToInt(unique, size); } /** @@ -114,6 +143,20 @@ public static AMapToData create(final int size, final MAP_TYPE t) { } } + /** + * Create a specific mapping based on the integer values given. This constructor does not guarantee the values in the + * int array is encode-able in the given mapping. + * + * @param values The values to encode into the mapping + * @param t The mapping type to use + * @return The filled mapping with the values + */ + public static AMapToData create(final int[] values, final MAP_TYPE t) { + AMapToData map = create(values.length, t); + map.copyInt(values); + return map; + } + /** * Force the mapping into an other mapping type. This method is unsafe since if there is overflows in the * conversions, they are not handled. Also if the change is into the same type a new map is allocated anyway. @@ -155,20 +198,20 @@ public static AMapToData resizeForce(AMapToData d, MAP_TYPE t) { /** * Estimate the size in memory of a MapToFactory. * - * @param size The size of the mapping - * @param numTuples The number of unique values to be supported by the mapping + * @param size The size of the mapping + * @param unique The number of unique values to support (can encode unique -1) * @return The size in number of bytes. */ - public static long estimateInMemorySize(int size, int numTuples) { - if(numTuples <= 1) + public static long estimateInMemorySize(int size, int unique) { + if(unique <= 1) return MapToZero.getInMemorySize(size); - else if(numTuples == 2 && size > 32) + else if(unique == 2 && size > 32) return MapToBit.getInMemorySize(size); - else if(numTuples <= 256) + else if(unique <= 256) return MapToByte.getInMemorySize(size); - else if(numTuples <= Character.MAX_VALUE + 1) + else if(unique <= Character.MAX_VALUE + 1) return MapToChar.getInMemorySize(size); - else if(numTuples <= MapToCharPByte.max) + else if(unique <= MapToCharPByte.max) return MapToCharPByte.getInMemorySize(size); else return MapToInt.getInMemorySize(size); @@ -201,4 +244,30 @@ public static AMapToData readIn(DataInput in) throws IOException { return MapToInt.readFields(in); } } + + /** + * Get the maximum value possible to encode in a specific mapping type. + * + * @param t The mapping type to analyze + * @return The maximum value to encode. + */ + public static int getMaxPossible(MAP_TYPE t) { + switch(t) { + case ZERO: + return 0; + case BIT: + return 1; + case UBYTE: + return 127; + case BYTE: + return 255; + case CHAR: + return Character.MAX_VALUE; + case CHAR_BYTE: + return MapToCharPByte.max; + case INT: + default: + return Integer.MAX_VALUE; + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java index 1b1e096757f..3dcec05e373 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java @@ -23,7 +23,6 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -37,7 +36,7 @@ public class MapToInt extends AMapToData { private final int[] _data; protected MapToInt(int size) { - this(Character.MAX_VALUE + 1, size); + this(Integer.MAX_VALUE, size); } public MapToInt(int unique, int size) { @@ -91,6 +90,13 @@ public void set(int n, int v) { _data[n] = v; } + @Override + public void set(int l, int u, int off, AMapToData tm) { + for(int i = l; i < u; i++, off++) { + set(i, tm.getIndex(off)); + } + } + @Override public int setAndGet(int n, int v) { return _data[n] = v; @@ -182,17 +188,11 @@ public int getUpperBoundValue() { } @Override - public void copyInt(int[] d) { - for(int i = 0; i < _data.length; i++) + public void copyInt(int[] d, int start, int end) { + for(int i = start; i < end; i++) _data[i] = d[i]; } - @Override - public void copyBit(BitSet d) { - for(int i = d.nextSetBit(0); i >= 0; i = d.nextSetBit(i + 1)) - _data[i] = 1; - } - @Override public int[] getCounts(int[] ret) { for(int i = 0; i < _data.length; i++) @@ -219,11 +219,11 @@ public AMapToData resize(int unique) { return new MapToZero(size); else if(unique == 2 && size > 32) ret = new MapToBit(unique, size); - else if(unique <= 127) + else if(unique < 128) ret = new MapToUByte(unique, size); else if(unique < 256) ret = new MapToByte(unique, size); - else if(unique < Character.MAX_VALUE - 1) + else if(unique < Character.MAX_VALUE) ret = new MapToChar(unique, size); else if(unique < MapToCharPByte.max) ret = new MapToCharPByte(unique, size); @@ -279,11 +279,6 @@ public AMapToData appendN(IMapToDataGroup[] d) { return new MapToInt(getUnique(), ret); } - @Override - public int getMaxPossible() { - return Integer.MAX_VALUE; - } - @Override public boolean equals(AMapToData e) { return e instanceof MapToInt && // diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java index e8c9b0926a3..97cbfdcde27 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToUByte.java @@ -31,11 +31,11 @@ public class MapToUByte extends MapToByte { private static final long serialVersionUID = -2498505439667351828L; protected MapToUByte(int size) { - this(127, size); + this(128, size); } public MapToUByte(int unique, int size) { - super(Math.min(unique, 127), new byte[size]); + super(Math.min(unique, 128), new byte[size]); } protected MapToUByte(int unique, byte[] data) { @@ -62,10 +62,6 @@ public void fill(int v) { Arrays.fill(_data, (byte) (v % 128)); } - public static long getInMemorySize(int dataLength) { - return MapToByte.getInMemorySize(dataLength); - } - @Override public void write(DataOutput out) throws IOException { out.writeByte(MAP_TYPE.UBYTE.ordinal()); @@ -124,11 +120,6 @@ public int[] getCounts(int[] ret) { return ret; } - @Override - public int getMaxPossible() { - return 128; - } - @Override protected void decompressToRangeNoOffBy8(double[] c, int r, double[] values) { c[r] += values[_data[r]]; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java index e3797dce3fd..b839fc336c2 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java @@ -22,7 +22,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.BitSet; import org.apache.commons.lang3.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; @@ -74,6 +73,11 @@ public void set(int n, int v) { // do nothing } + @Override + public void set(int l, int u, int off, AMapToData tm){ + // do nothing + } + @Override public int setAndGet(int n, int v) { return 0; @@ -127,12 +131,12 @@ public void preAggregateDDC_DDCMultiCol(AMapToData tm, IDictionary td, double[] } @Override - public void copyInt(int[] d) { + public void copyInt(int[] d, int start, int end) { // do nothing } @Override - public void copyBit(BitSet d) { + public void copyBit(MapToBit d) { // do nothing } @@ -178,15 +182,9 @@ public AMapToData appendN(IMapToDataGroup[] d) { return new MapToZero(p); } - @Override - public int getMaxPossible() { - return 1; - } - @Override public boolean equals(AMapToData e) { return e instanceof MapToZero && // - e.getUnique() == getUnique() && // _size == ((MapToZero) e)._size; } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java index 9032dfd0f5d..c8a1b4e3c48 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeMC.java @@ -189,7 +189,7 @@ private Pair tryUpdateAndEncode(MatrixBlock data, ReaderC while((cellVals = reader.nextRow()) != null) { final int row = reader.getCurrentRowIndex(); final int id = map.increment(cellVals); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(row, id); } @@ -204,7 +204,7 @@ private Pair tryUpdateAndEncode(MatrixBlock data, ReaderC d.set(r++, emptyIdx.id); } final int id = map.increment(cellVals); - if(id >= max) + if(id > max) throw new DMLCompressionException( "Failed update and encode with " + max + " possible values" + map + " " + map.size()); d.set(row, id); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java index 2a5981dea6f..49bf8b52e49 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/scheme/DDCSchemeSC.java @@ -174,7 +174,7 @@ protected Pair tryUpdateAndEncode(MatrixBlock data, IColI } private void encodeAndUpdate(MatrixBlock data, AMapToData d, int col) { - final int max = d.getMaxPossible(); + final int max = d.getUpperBoundValue(); if(data.isInSparseFormat()) encodeAndUpdateSparse(data, d, col, max); else if(data.getDenseBlock().isContiguous()) @@ -189,7 +189,7 @@ private void encodeAndUpdateSparse(MatrixBlock data, AMapToData d, int col, int for(int i = 0; i < nRow; i++) { int id = map.increment(sb.get(i, col)); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(i, id); } @@ -203,7 +203,7 @@ private void encodeAndUpdateDense(final MatrixBlock data, final AMapToData d, fi final int end = nRow * nCol; // guaranteed lower than intend. for(int i = 0, off = col; off < end; i++, off += nCol) { int id = map.increment(vals[off]); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(i, id); } @@ -216,7 +216,7 @@ private void encodeAndUpdateGeneric(MatrixBlock data, AMapToData d, int col, int final double[] c = db.values(i); final int off = db.pos(i) + col; int id = map.increment(c[off]); - if(id >= max) + if(id > max) throw new DMLCompressionException("Failed update and encode with " + max + " possible values"); d.set(i, id); } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java index 8ab13cf9f34..e679f4e8b15 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/CustomMappingTest.java @@ -19,14 +19,54 @@ package org.apache.sysds.test.component.compress.mapping; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import java.util.Arrays; +import java.util.Random; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; +import org.apache.sysds.runtime.compress.DMLCompressionException; +import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; +import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; +import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary; +import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; +import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; +import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToBit; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToByte; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToChar; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToCharPByte; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToInt; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToUByte; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToZero; +import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; +import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; +import org.apache.sysds.runtime.compress.utils.IntArrayList; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.DenseBlockFactory; +import org.apache.sysds.runtime.data.SparseBlock; +import org.apache.sysds.runtime.data.SparseBlockFactory; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.junit.Test; public class CustomMappingTest { + protected static final Log LOG = LogFactory.getLog(CustomMappingTest.class.getName()); + int[] data = new int[] {0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, @@ -49,4 +89,509 @@ public void createBinary() { fail(e.getMessage()); } } + + @Test + public void verifySpy() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(data, 2); + AMapToData spy = spy(d); + when(spy.getIndex(2)).thenReturn(32); + assertThrows(DMLCompressionException.class, () -> spy.verify()); + } + + @Test + public void equals() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(data, 2); + AMapToData d2 = MapToFactory.create(data, 2); + assertTrue(d.equals(d)); + assertTrue(d.equals(d2)); + assertFalse(d.equals(MapToFactory.create(new int[] {1, 2, 3}, 4))); + assertFalse(d.equals(Integer.valueOf(23))); + } + + @Test + public void countRuns() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}, 3); + AOffset o = OffsetFactory.createOffset(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + assertEquals(d.countRuns(o), 2); + } + + @Test + public void countRuns2() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}, 3); + AOffset o = OffsetFactory.createOffset(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11}); + assertEquals(d.countRuns(o), 3); + } + + @Test + public void getMax() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}, 3); + assertEquals(d.getMax(), 2); + d = MapToFactory.create(new int[] {1, 1, 1, 1, 1, 2, 2, 2, 5, 2}, 10); + assertEquals(d.getMax(), 5); + d = MapToFactory.create(new int[] {1, 1, 1, 9, 1, 2, 2, 2, 2, 2}, 10); + assertEquals(d.getMax(), 9); + } + + @Test + public void copyInt() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, 11); + AMapToData d2 = MapToFactory.create(new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, Integer.MAX_VALUE - 2); + d.copy(d2); + for(int i = 0; i < 10; i++) { + assertEquals(d.getIndex(i), d2.getIndex(i)); + } + } + + @Test + public void setInteger() { + CompressedMatrixBlock.debug = true; + AMapToData d = MapToFactory.create(new int[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, 11); + + for(int i = 0; i < 10; i++) { + assertEquals(d.getIndex(i), 10 - i); + } + d.set(4, Integer.valueOf(13)); + assertEquals(d.getIndex(4), 13); + } + + @Test(expected = NotImplementedException.class) + public void preAggDenseNonContiguous() { + AMapToData d = MapToFactory.create(new int[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, 11); + MatrixBlock mb = new MatrixBlock(); + MatrixBlock spy = spy(mb); + DenseBlock db = mock(DenseBlock.class); + when(db.isContiguous()).thenReturn(false); + when(spy.getDenseBlock()).thenReturn(db); + + d.preAggregateDense(spy, null, 10, 13, 0, 10); + } + + @Test + public void compareParallelCreate() throws Exception { + int[] input = new int[100]; + Random r = new Random(32); + for(int i = 0; i < 100; i++) { + input[i] = r.nextInt(100); + } + + AMapToData a = MapToFactory.create(101, input, 100, 10); + AMapToData b = MapToFactory.create(101, input, 100); + assertEquals(a, b); + } + + @Test + public void allocateMapToPByte() { + assertEquals(MapToCharPByte.class, MapToFactory.create(10, MapToCharPByte.max).getClass()); + } + + @Test + public void createSpecificType() { + assertEquals(MapToCharPByte.class, MapToFactory.create(10, MAP_TYPE.CHAR_BYTE).getClass()); + assertEquals(MapToZero.class, MapToFactory.create(10, MAP_TYPE.ZERO).getClass()); + assertEquals(MapToBit.class, MapToFactory.create(10, MAP_TYPE.BIT).getClass()); + assertEquals(MapToByte.class, MapToFactory.create(10, MAP_TYPE.BYTE).getClass()); + assertEquals(MapToUByte.class, MapToFactory.create(10, MAP_TYPE.UBYTE).getClass()); + assertEquals(MapToChar.class, MapToFactory.create(10, MAP_TYPE.CHAR).getClass()); + assertEquals(MapToInt.class, MapToFactory.create(10, MAP_TYPE.INT).getClass()); + } + + @Test + public void estimateInMemorySize() { + for(int i = 0; i < 10; i++) { + + assertEquals(MapToFactory.estimateInMemorySize(i, i), MapToFactory.create(i, i).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256), MapToFactory.create(i, 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256 * 256), + MapToFactory.create(i, 256 * 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256 * 256 * 256), + MapToFactory.create(i, 256 * 256 * 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, 256 * 256 * 256 * 256), + MapToFactory.create(i, 256 * 256 * 256 * 256).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, MapToCharPByte.max), + MapToFactory.create(i, MapToCharPByte.max).getInMemorySize()); + assertEquals(MapToFactory.estimateInMemorySize(i, Integer.MAX_VALUE), + MapToFactory.create(i, Integer.MAX_VALUE).getInMemorySize()); + } + } + + @Test + public void createWithIntArrayList() { + AMapToData a = MapToFactory.create(10, new IntArrayList(new int[] {1, 2, 3, 4})); + for(int i = 0; i < 4; i++) { + assertEquals(i + 1, a.getIndex(i)); + } + } + + @Test + public void resize() { + int s = 10; + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData a = MapToFactory.create(s, m); + a.getUpperBoundValue(); + for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getUpperBoundValue(); i = i * 2) { + for(int j = 0; j < s; j++) { + a.set(j, (int) Math.max(0L, (long) i - j - 1)); + } + AMapToData b = a.resize(i); + String mm = a.toString() + " vs " + b.toString(); + for(int j = 0; j < s; j++) { + assertEquals(mm, a.getIndex(j), b.getIndex(j)); + } + } + + } + } + + @Test + public void resize2() { + int s = 42; + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData a = MapToFactory.create(s, m); + a.getUpperBoundValue(); + for(int i = 1; i < Integer.MAX_VALUE / 2 && i < a.getUpperBoundValue(); i = i * 2) { + for(int j = 0; j < s; j++) { + a.set(j, (int) Math.max(0L, (long) i - j - 1)); + } + AMapToData b = a.resize(i); + String mm = a.toString() + " vs " + b.toString(); + for(int j = 0; j < s; j++) { + assertEquals(mm, a.getIndex(j), b.getIndex(j)); + } + } + + } + } + + @Test + public void testBitSetFill() { + AMapToData a = MapToFactory.create(100, MAP_TYPE.BIT); + testFill(100, a); + } + + @Test + public void testBitSetFill64() { + int length = 64 * 3; + AMapToData a = MapToFactory.create(length, MAP_TYPE.BIT); + testFill(length, a); + } + + private void testFill(int length, AMapToData a) { + for(int i = 0; i < length; i++) { + assertEquals(0, a.getIndex(i)); + } + + a.fill(1); + for(int i = 0; i < length; i++) { + assertEquals(1, a.getIndex(i)); + } + + a.fill(0); + for(int i = 0; i < length; i++) { + assertEquals(0, a.getIndex(i)); + } + } + + @Test + public void testBitSetNextBitOutOfRange() { + MapToBit m = new MapToBit(2, 100); + assertEquals(-1, m.nextSetBit(0)); + assertEquals(-1, m.nextSetBit(1000)); + m.fill(1); + assertEquals(-1, m.nextSetBit(1000)); + assertEquals(-1, m.nextSetBit(100)); + assertEquals(99, m.nextSetBit(99)); + assertEquals(98, m.nextSetBit(98)); + + } + + @Test + public void testBitSetNextBit() { + MapToBit m = new MapToBit(2, 100); + m.set(1,1); + m.set(98,1); + assertEquals(1, m.nextSetBit(0)); + assertEquals(98, m.nextSetBit(2)); + m.fill(1); + for(int i = 0; i < 100; i++){ + m.set(i, 0); + } + + assertEquals(-1, m.nextSetBit(0)); + + + } + + + @Test + public void decompressToRange() { + double[] values = new double[] {1, 2, 3, 4, 5, 6}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 0; + int ru = map.size(); + int off = 0; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRange2() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 3; + int ru = map.size(); + int off = 0; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRange3() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 1; + int ru = map.size() - 2; + int off = 0; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRangeOffset1() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 1; + int ru = map.size() - 2; + int off = -1; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + @Test + public void decompressToRangeOffset2() { + double[] values = new double[] {1, 2, 3, 4, 5, 6.3}; + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData map = MapToFactory.create(new int[] {0, 1, 2, 3, 4, 5, 1, 2, 3}, m); + int rl = 1; + int ru = map.size() - 2; + int off = 1; + + evalDecompressRange(values, map, rl, ru, off); + } + } + + private void evalDecompressRange(double[] values, AMapToData map, int rl, int ru, int off) { + double[] ret = new double[map.size() + off]; + map.decompressToRange(ret, rl, ru, off, values); + String r = Arrays.toString(ret); + for(int i = 0; i < ret.length; i++) { + if(i < rl + off || i >= ru + off) + assertEquals(r + "index : " + i, 0, ret[i], 0); + else + assertEquals(r + "index : " + i, values[map.getIndex(i - off)], ret[i], 0); + } + } + + @Test + public void isEmptyBitSet() { + MapToBit m = new MapToBit(2, 1000); + assertTrue(m.isEmpty()); + m.set(134, 1); + assertFalse(m.isEmpty()); + m.set(134, 0); + assertTrue(m.isEmpty()); + m.fill(1); + assertFalse(m.isEmpty()); + m.fill(0); + assertTrue(m.isEmpty()); + } + + @Test(expected = RuntimeException.class) + public void appendNonZero() { + MapToZero m = new MapToZero(10); + IMapToDataGroup g = mock(IMapToDataGroup.class); + when(g.getMapToData()).thenReturn(new MapToBit(2, 10)); + m.appendN(new IMapToDataGroup[] {g}); + } + + @Test + public void getType() { + + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + assertEquals(m, MapToFactory.create(10, m).getType()); + } + } + + @Test + public void setAndGet() { + Random r = new Random(324); + for(MAP_TYPE m : MapToFactory.MAP_TYPE.values()) { + AMapToData mm = MapToFactory.create(10, m); + int v = MapToFactory.getMaxPossible(m); + + assertEquals(v, mm.setAndGet(1, v)); + if(v != 0){ + for(int i = 0; i < 100; i++){ + int rv = r.nextInt(v); + int ri = r.nextInt(mm.size()); + assertEquals(rv, mm.setAndGet(ri, rv)); + } + } + } + } + + + @Test + public void nothingTestsForMapToZero(){ + MapToZero m = new MapToZero(10); + m.copyBit(null); // do nothing + m.replace(-1,10);// do nothing + m.set(1,1,1,null); // do nothing + assertEquals(0, m.getUpperBoundValue()); + assertEquals(m, new MapToZero(10)); + + } + + @Test + public void mapToZeroSlice(){ + MapToZero m = new MapToZero(10); + AMapToData m2 = m.slice(3,8); // return new. + assertEquals(new MapToZero(8-3),m2); + + } + + + @Test + public void mapToBitEmptySlice(){ + MapToBit m = new MapToBit(2, 20); + AMapToData m2 = m.slice(3,8); // return new. + assertEquals(new MapToZero(8-3),m2); + + } + + @Test + public void mapToZeroEquals(){ + MapToZero m = new MapToZero(10); + assertNotEquals(MapToFactory.create(10, MAP_TYPE.BYTE),m); + + } + + + @Test + public void sparseMM(){ + for(MAP_TYPE t : MAP_TYPE.values()){ + if(t == MAP_TYPE.ZERO) + continue; + AMapToData map = MapToFactory.create(new int[] {0,1,1}, t); + SparseBlock sb = SparseBlockFactory.createIdentityMatrix(3); + DenseBlock ret = DenseBlockFactory.createDenseBlock(new double[3 * 10], 3, 10); + IDictionary dict = Dictionary.create(new double[]{1,1,1,2,2,2,3,3}); + IColIndex cols = ColIndexFactory.create(new int[]{1,4,8}); + + map.lmSparseMatrixRow(sb, 0, ret, cols, dict); + + for(int i = 0; i < cols.size(); i++){ + assertEquals(1, ret.get(0, cols.get(i)), 0); + } + + map.lmSparseMatrixRow(sb, 1, ret, cols, dict); + for(int i = 0; i < cols.size(); i++){ + assertEquals(2, ret.get(1, cols.get(i)), 0); + } + + map.lmSparseMatrixRow(SparseBlockFactory.createSparseBlock(10), 1, ret, cols, dict); + for(int i = 0; i < cols.size(); i++){ + assertEquals(2, ret.get(1, cols.get(i)), 0); + } + + for(int i = 0; i < 10; i++){ + assertEquals(0, ret.get(2, i), 0); + } + + assertEquals(6, ret.countNonZeros()); + } + + } + + + @Test + public void counts(){ + MapToBit m = new MapToBit(2, 134); + m.set(3,1); + m.set(10,1); + m.set(110,1); + int[] counts = m.getCounts(); + for(MAP_TYPE t : MAP_TYPE.values()){ + if(t == MAP_TYPE.ZERO) + continue; + AMapToData d = MapToFactory.resizeForce(m, t); + assertArrayEquals(counts, d.getCounts()); + } + + + } + + @Test + public void countRunsAlternative(){ + MapToBit m = new MapToBit(2, 134); + m.set(3,1); + m.set(10,1); + m.set(110,1); + int counts = m.countRuns(); + for(MAP_TYPE t : MAP_TYPE.values()){ + if(t == MAP_TYPE.ZERO) + continue; + AMapToData d = MapToFactory.resizeForce(m, t); + assertEquals(counts, d.countRuns()); + } + + + } + + + @Test + public void bitToZero(){ + MapToBit m = new MapToBit(2, 10); + m.fill(1); + AMapToData a = m.resize(1); + for(int i = 0; i < 10; i++) + assertEquals(0, a.getIndex(i)); + } + + @Test(expected = RuntimeException.class) + public void invalidJoin(){ + MapToBit a = new MapToBit(2, 100); + MapToBit b = new MapToBit(2, 200); + + a.preAggregateDDC_DDCSingleCol(b, null,null); + } + + @Test + public void equalsMapToZZero(){ + MapToZero m = new MapToZero(10); + assertEquals(m, new MapToZero(10)); + assertNotEquals(m, new MapToZero(11)); + assertNotEquals(m, new MapToZero(1)); + assertNotEquals(m, new MapToBit(2, 1)); + assertNotEquals(m, new MapToBit(2, 10)); + } + } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java index 7f51c8ed027..7e4191a5d0b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java @@ -19,6 +19,7 @@ package org.apache.sysds.test.component.compress.mapping; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -34,6 +35,7 @@ import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; import org.apache.sysds.runtime.compress.colgroup.offset.OffsetByte; +import org.apache.sysds.runtime.data.SparseBlockFactory; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.test.TestUtils; import org.junit.Test; @@ -258,7 +260,6 @@ public void testPreAggRowsColsRange(int rl, int ru, int cl, int cu) { @Test public void testPreAggregateDenseSingleRowWithIndexes() { switch(type) { - case BIT: case INT: return; default: @@ -289,6 +290,81 @@ public void testPreAggregateSparseSingleRowWithIndexes() { } } + @Test + public void testPreAggregateSparseSingleRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique()]; + m.preAggregateSparse(sb.getSparseBlock(), pre, 0, 1); + verifyPreaggregate(m, sb, 0, 1, pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + + @Test + public void testPreAggregateSparseMultiRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique() * sb.getNumRows()]; + m.preAggregateSparse(sb.getSparseBlock(), pre, 0, sb.getNumRows()); + verifyPreaggregate(m, sb, 0, sb.getNumRows(), pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + + @Test + public void testPreAggregateSparseEmptySingleRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique()]; + MatrixBlock sb2 = new MatrixBlock(sb.getNumRows(), sb.getNumColumns(), 0, SparseBlockFactory.createSparseBlock(sb.getNumRows())); + m.preAggregateSparse(sb2.getSparseBlock(), pre, 0, 1); + verifyPreaggregate(m, sb2, 0, 1, pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + + @Test + public void testPreAggregateSparseEmptyMultiRow() { + try { + if(!sb.isInSparseFormat()) + return; + double[] pre = new double[m.getUnique() * sb.getNumRows()]; + MatrixBlock sb2 = new MatrixBlock(sb.getNumRows(), sb.getNumColumns(), 0, SparseBlockFactory.createSparseBlock(sb.getNumRows())); + m.preAggregateSparse(sb2.getSparseBlock(), pre, 0, sb.getNumRows()); + verifyPreaggregate(m, sb2, 0, sb.getNumRows(), pre); + } + catch(Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + + + private void verifyPreaggregate(AMapToData m, MatrixBlock mb, int rl, int ru, double[] ret){ + + double[] verification = new double[ret.length]; + for(int i = rl; i < ru; i++){ + for(int j = 0; j < mb.getNumColumns(); j++){ + verification[m.getIndex(j) + i * m.getUnique()] += mb.get(i,j); + } + } + assertArrayEquals(verification, ret, 0); + } + + private void compareRes(double[] expectedFull, double[] actual, int row) { String error = "\nNot equal elements with " + type + " " + m.getUnique(); int nVal = m.getUnique(); diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java index dc64e64f41d..df84878fc25 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java @@ -20,6 +20,8 @@ package org.apache.sysds.test.component.compress.mapping; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; @@ -31,6 +33,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Random; +import java.util.concurrent.ExecutorService; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.logging.Log; @@ -38,9 +41,12 @@ import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToBit; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToCharPByte; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; +import org.apache.sysds.runtime.compress.colgroup.mapping.MapToZero; +import org.apache.sysds.runtime.util.CommonThreadPool; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -93,24 +99,28 @@ public MappingTests(int seed, MAP_TYPE type, int size, boolean fill) { this.seed = seed; this.type = type; this.size = size; - this.max = Math.min(Math.min(MappingTestUtil.getUpperBoundValue(type), fictiveMax) + 1, size); - expected = new int[size]; - m = genMap(MapToFactory.create(size, max), expected, max, fill, seed); + this.max = MappingTestUtil.getUpperBoundValue(type); + this.expected = new int[size]; + m = genMap(MapToFactory.create(size, (int) (Math.min(Integer.MAX_VALUE, (long) max + 1))), expected, max, fill, + seed); } public static AMapToData genMap(AMapToData m, int[] expected, int max, boolean fill, int seed) { - if(max <= 1) + if(max <= 0) return m; Random vals = new Random(seed); int size = m.size(); + + int randUpperBound = (int) (Math.min(Integer.MAX_VALUE, (long) max + 1)); + if(fill) { - int v = vals.nextInt(max); + int v = vals.nextInt(randUpperBound); m.fill(v); Arrays.fill(expected, v); } for(int i = 0; i < size; i++) { - int v = vals.nextInt(max); + int v = vals.nextInt(randUpperBound); if(fill) { if(v > max / 2) continue; @@ -126,7 +136,7 @@ public static AMapToData genMap(AMapToData m, int[] expected, int max, boolean f } // to make sure that the bit set is actually filled. - for(int i = 0; i < max; i++) { + for(int i = 0; i <= max && i < size; i++) { m.set(i, i); expected[i] = i; @@ -154,7 +164,6 @@ public void testSerialization() { DataInputStream fis = new DataInputStream(bis); AMapToData n = MapToFactory.readIn(fis); - compare(m, n); } catch(IOException e) { @@ -166,6 +175,23 @@ public void testSerialization() { } } + @Test + public void equalsTest() { + AMapToData tmp = MapToFactory.create(m.size(), m.getUnique()); + if(m instanceof MapToZero) + assertTrue(m.equals(tmp)); + else + assertFalse(m.equals(tmp)); + tmp.copy(m); + assertTrue(m.equals(tmp)); + } + + @Test + public void countRuns() { + int runs = m.countRuns(); + assertTrue(runs <= m.size()); + } + @Test public void testOnDiskSizeInBytes() { try { @@ -213,7 +239,7 @@ public void resize() { @Test public void resizeToSameSize() { // if we resize to same size return the same object! - AMapToData m_same = m.resize( m.getUnique()); + AMapToData m_same = m.resize(m.getUnique()); assertEquals("Resize did not return the correct same objects", m_same, m); } @@ -226,10 +252,12 @@ protected static void compare(AMapToData a, AMapToData b) { @Test public void replaceMax() { - m.replace(max - 1, 0); + if(m instanceof MapToZero) + return; + m.replace(max, 0); for(int i = 0; i < size; i++) { - expected[i] = expected[i] == max - 1 ? 0 : expected[i]; + expected[i] = expected[i] == max ? 0 : expected[i]; if(expected[i] != m.getIndex(i)) fail("Expected equals " + Arrays.toString(expected) + "\nbut got: " + m); } @@ -240,6 +268,9 @@ public void getCountsNoDefault() { try { int nVal = m.getUnique(); + if(nVal > 1000) + return; + int[] counts = m.getCounts(new int[nVal]); int sum = 0; for(int v : counts) @@ -256,6 +287,8 @@ public void getCountsNoDefault() { @Test public void replaceMin() { + if(m instanceof MapToZero) + return; int max = m.getUpperBoundValue(); m.replace(0, max); @@ -269,14 +302,17 @@ public void replaceMin() { @Test public void getUnique() { int u = m.getUnique(); - if(max != u) + if(m instanceof MapToZero) + return; + + if((int) (Math.min(Integer.MAX_VALUE, (long) max + 1)) != u) fail("incorrect number of unique " + m + "expectedInstances" + max + " got" + u); } @Test public void testInMemorySize() { long inMemorySize = m.getInMemorySize(); - long estimatedSize = MapToFactory.estimateInMemorySize(size, max); + long estimatedSize = MapToFactory.estimateInMemorySize(size, (int) (Math.min(Integer.MAX_VALUE, (long) max + 1))); if(estimatedSize != inMemorySize) fail(" estimated size is not actual size: \nest: " + estimatedSize + " act: " + inMemorySize + "\n" @@ -286,6 +322,8 @@ public void testInMemorySize() { @Test public void testAppend() { int nVal = m.getUnique(); + if(nVal > 10000) + return; int[] counts = m.getCounts(new int[nVal]); AMapToData m2 = m.append(m); @@ -300,6 +338,8 @@ public void testAppend() { @Test public void testAppendN() { int nVal = m.getUnique(); + if(nVal > 10000) + return; int[] counts = m.getCounts(new int[nVal]); try { @@ -350,6 +390,56 @@ public void testAppendNotSame() { LOG.error("Did not throw exception with: " + m); } + @Test + public void splitReshapeParallel() throws Exception { + if(m.size() % 2 == 0) { + + ExecutorService pool = CommonThreadPool.get(); + AMapToData[] ret = m.splitReshapeDDCPushDown(2, pool); + + for(int i = 0; i < m.size(); i++) { + assertEquals(m.getIndex(i), ret[i % 2].getIndex(i / 2)); + } + } + } + + @Test + public void splitReshape2() throws Exception { + if(m.size() % 2 == 0) { + + AMapToData[] ret = m.splitReshapeDDC(2); + + for(int i = 0; i < m.size(); i++) { + assertEquals(m.getIndex(i), ret[i % 2].getIndex(i / 2)); + } + } + } + + @Test + public void splitReshape4() throws Exception { + if(m.size() % 4 == 0) { + + AMapToData[] ret = m.splitReshapeDDC(4); + + for(int i = 0; i < m.size(); i++) { + assertEquals(m.getIndex(i), ret[i % 4].getIndex(i / 4)); + } + } + } + + @Test + public void getCounts() { + if(m.getUnique() > 10000) + return; + int[] counts = m.getCounts(); + int countZeros = 0; + for(int i = 0; i < m.size(); i++) { + if(m.getIndex(i) == 0) + countZeros++; + } + assertEquals(counts[0], countZeros); + } + private static class Holder implements IMapToDataGroup { AMapToData d; @@ -364,4 +454,60 @@ public AMapToData getMapToData() { } } + + @Test + public void slice() { + if(m.size() > 2) { + AMapToData s = m.slice(1, m.size() - 1); + for(int i = 0; i < m.size() - 2; i++) { + assertEquals(m.getIndex(i + 1), s.getIndex(i)); + } + } + } + + @Test + public void setRange() { + AMapToData tmp = MapToFactory.create(m.size(), m.getUnique()); + tmp.copy(m); + + tmp.set(0, m.size(), 0, new MapToZero(size)); + for(int i = 0; i < m.size(); i++) + assertEquals(0, tmp.getIndex(i)); + + if(m.size() > 11) { + tmp.copy(m); + + tmp.set(10, m.size(), 0, new MapToZero(size)); + for(int i = 0; i < 10; i++) + assertEquals(m.getIndex(i), tmp.getIndex(i)); + for(int i = 10; i < m.size(); i++) + assertEquals(0, tmp.getIndex(i)); + + if(m instanceof MapToZero) + return; + tmp.copy(m); + AMapToData tmp2 = new MapToBit(2, size - 10); + tmp2.fill(1); + tmp2.set(0, 0); + tmp.set(10, m.size(), 0, tmp2); + for(int i = 0; i < 10; i++) + assertEquals(m.getIndex(i), tmp.getIndex(i)); + assertEquals(0, tmp.getIndex(10)); + for(int i = 11; i < m.size(); i++) + assertEquals(1, tmp.getIndex(i)); + + for(MAP_TYPE t : MAP_TYPE.values()) { + if(t == MAP_TYPE.ZERO) + continue; + tmp.copy(m); + tmp2 = MapToFactory.resizeForce(tmp2, t); + tmp.set(10, m.size(), 0, tmp2); + for(int i = 0; i < 10; i++) + assertEquals(m.getIndex(i), tmp.getIndex(i)); + assertEquals(0, tmp.getIndex(10)); + for(int i = 11; i < m.size(); i++) + assertEquals(1, tmp.getIndex(i)); + } + } + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java deleted file mode 100644 index 3bcbb7ac041..00000000000 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysds.test.component.compress.mapping; - -import static org.junit.Assert.fail; - -import java.util.ArrayList; -import java.util.Collection; - -import org.apache.sysds.runtime.compress.CompressedMatrixBlock; -import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData; -import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; -import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(value = Parameterized.class) -public class MappingTestsResize { - - public final int seed; - public final MAP_TYPE type; - public final int size; - - private AMapToData m; - private int[] expected; - - @Parameters - public static Collection data() { - ArrayList tests = new ArrayList<>(); - for(MAP_TYPE t : MAP_TYPE.values()) { - tests.add(new Object[] {1, t, 13, false}); - tests.add(new Object[] {1, t, 632, false}); - } - return tests; - } - - public MappingTestsResize(int seed, MAP_TYPE type, int size, boolean fill) { - CompressedMatrixBlock.debug = true; - this.seed = seed; - this.type = type; - this.size = size; - try{ - - final int max = Math.min(MappingTestUtil.getUpperBoundValue(type),size); - final int maxSmaller = Math.min(getMaxSmaller(type), size); - expected = new int[size]; - m = MappingTests.genMap(MapToFactory.create(size, max), expected, maxSmaller, fill, seed); - } - catch(Exception e){ - e.printStackTrace(); - fail("Failed creating mapping resize test"); - } - } - - @Test - public void resize() { - MappingTests.compare(m.resize(getMaxSmaller(type)), m); - } - - private int getMaxSmaller(MAP_TYPE type) { - switch(type) { - case BIT: - case UBYTE: - return 1; - case BYTE: - return 127; - case CHAR: - return (int) Math.pow(2, 8) - 1; - default: - return Character.MAX_VALUE; - } - } - -} diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java index 4837275a32e..20a027ca120 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateDDC_DDCTest.java @@ -57,6 +57,26 @@ public static Collection data() { final Random r = new Random(2321522); final int sm = Integer.MAX_VALUE; + create(tests, 10, 1, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 10, 1, r.nextInt(sm)); + create(tests, 10, 10, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 1, 2, r.nextInt(sm)); + create(tests, 10, 1, 10, 2, r.nextInt(sm)); + + create(tests, 10, 1, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 2, 1, r.nextInt(sm)); + create(tests, 10, 2, 1, 1, r.nextInt(sm)); + create(tests, 10, 1, 1, 2, r.nextInt(sm)); + create(tests, 10, 1, 2, 2, r.nextInt(sm)); + + create(tests, 66, 1, 1, 1, r.nextInt(sm)); + create(tests, 66, 1, 2, 1, r.nextInt(sm)); + create(tests, 66, 2, 1, 1, r.nextInt(sm)); + create(tests, 66, 1, 1, 2, r.nextInt(sm)); + create(tests, 66, 1, 2, 2, r.nextInt(sm)); + + + create(tests, 10, 10, 1, 2, r.nextInt(sm)); create(tests, 10, 10, 5, 1, r.nextInt(sm)); create(tests, 10, 10, 5, 1, r.nextInt(sm)); create(tests, 100, 10, 5, 1, r.nextInt(sm)); @@ -76,6 +96,8 @@ public static Collection data() { create(tests, 10000, 32, 2, 1, r.nextInt(sm)); create(tests, 10000, 2, 2, 1, r.nextInt(sm)); create(tests, 10000, 2, 2, 10, r.nextInt(sm)); + create(tests, 10005, 2, 2, 1, r.nextInt(sm)); + create(tests, 10005, 2, 2, 10, r.nextInt(sm)); createSkewed(tests, 10000, 2, 2, 10, r.nextInt(sm), 0.1); createSkewed(tests, 10000, 2, 2, 10, r.nextInt(sm), 0.01); diff --git a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java index f2d85c056d1..2ba02c70d16 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/mapping/PreAggregateSDCZ_SDCZTest.java @@ -75,6 +75,10 @@ public static Collection data() { create(tests, 10000, 150, 13, 1, 1000, 100, r.nextInt(sm)); create(tests, 10000, 150, 149, 1, 1000, 100, r.nextInt(sm)); + create(tests, 10000, 32, 200, 1, 100, 1000, r.nextInt(sm)); + create(tests, 10000, 150, 13, 1, 100, 1000, r.nextInt(sm)); + create(tests, 10000, 150, 149, 1, 100, 1000, r.nextInt(sm)); + return tests; }