Skip to content

Commit 3053dc7

Browse files
committed
Merge branch 'master' of github.com:lemire/simdcomp
2 parents c366d46 + 54c5154 commit 3053dc7

File tree

7 files changed

+134
-15
lines changed

7 files changed

+134
-15
lines changed

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1+
Version 0.0.2 (6 February 2014)
2+
- added go demo
13
Version 0.0.1 (5 February 2014)

README.md

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,34 @@
11
The SIMDComp library
22
====================
33

4-
A simple C library for compressing lists of integers
4+
A simple C library for compressing lists of integers using binary packing and SIMD instructions.
5+
6+
This library can decode billions of compressed integers per second on most
7+
desktop or laptop processors.
8+
9+
What is it for?
10+
-------------
11+
12+
This is a low-level library for fast integer compression. By design it does not define a compressed
13+
format. It is up to the (sophisticated) user to create a compressed format.
514

615
Requirements
7-
=============
16+
-------------
817

918
- Your processor should support SSE2 (Pentium4 or better)
1019
- C99 compliant compiler (GCC is assumed)
1120
- A Linux-like distribution is assumed by the makefile
1221

1322
Usage
14-
=======
23+
-------
1524

1625
Compression works over blocks of 128 integers.
1726

27+
For a complete working example, see example.c (you can build it and
28+
run it with "make example; ./example").
29+
30+
31+
1832
1) Lists of integers in random order.
1933

2034
const uint32_t b = maxbits(datain);// computes bit width
@@ -33,7 +47,8 @@ We used differential coding: we store the difference between successive integers
3347
simdunpackd1(offset, buffer, backbuffer, b1);//uncompressed
3448

3549
Setup
36-
=======
50+
---------
51+
3752

3853
make
3954
make test
@@ -43,12 +58,23 @@ and if you are daring:
4358
make install
4459

4560
Go
46-
===
61+
--------
4762

4863
If you are a go user, there is a "go" folder where you will find a simple demo.
4964

65+
Other libraries
66+
----------------
67+
68+
FastPFOR is a C++ research library well suited to compress unsorted arrays:
69+
https://github.com/lemire/FastPFor
70+
71+
SIMDCompressionAndIntersection is a C++ research library well suited for sorted arrays (differential coding)
72+
and computing intersections:
73+
https://github.com/lemire/SIMDCompressionAndIntersection
74+
5075
References
51-
===========
76+
------------
77+
5278

5379
Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software: Practice & Experience, 2013.
5480
http://dx.doi.org/10.1002/spe.2203

example.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include <stdio.h>
2+
#include <time.h>
3+
#include "simdcomp.h"
4+
5+
6+
// compresses data from datain to buffer, returns how many bytes written
7+
size_t compress(uint32_t * datain, size_t length, uint8_t * buffer) {
8+
if(length/SIMDBlockSize*SIMDBlockSize != length) {
9+
printf("Data length should be a multiple of %i \n",SIMDBlockSize);
10+
}
11+
uint32_t offset = 0;
12+
uint8_t * initout = buffer;
13+
for(size_t k = 0; k < length / SIMDBlockSize; ++k) {
14+
uint32_t b = simdmaxbitsd1(offset,
15+
datain + k * SIMDBlockSize);
16+
*buffer++ = b;
17+
simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, (__m128i *) buffer,
18+
b);
19+
offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
20+
buffer += b * sizeof(__m128i);
21+
}
22+
return buffer - initout;
23+
}
24+
25+
26+
int main() {
27+
int N = 5000 * SIMDBlockSize;//SIMDBlockSize is 128
28+
uint32_t * datain = malloc(N * sizeof(uint32_t));
29+
size_t compsize;
30+
clock_t start, end;
31+
32+
uint8_t * buffer = malloc(N * sizeof(uint32_t) + N / SIMDBlockSize); // output buffer
33+
uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
34+
for (int gap = 1; gap <= 243; gap *= 3) {
35+
printf("\n");
36+
printf(" gap = %u \n", gap);
37+
for (int k = 0; k < N; ++k)
38+
datain[k] = k * gap;
39+
uint32_t offset = 0;
40+
compsize = compress(datain,N,buffer);
41+
printf("compression rate = %f \n", (N * sizeof(uint32_t))/ (compsize * 1.0 ));
42+
uint8_t * decbuffer = buffer;
43+
start = clock();
44+
uint32_t bogus = 0;
45+
for (int k = 0; k * SIMDBlockSize < N; ++k) {
46+
uint8_t b = *decbuffer++;
47+
simdunpackd1(offset, (__m128i *) decbuffer, backbuffer, b);
48+
// do something here with backbuffer
49+
bogus += backbuffer[3];
50+
decbuffer += b * sizeof(__m128i);
51+
offset = backbuffer[SIMDBlockSize - 1];
52+
}
53+
end = clock();
54+
double numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
55+
printf("decoding speed in million of integers per second %f \n",N/(numberofseconds*1000.0*1000.0));
56+
printf("ignore me %i \n",bogus);
57+
}
58+
free(buffer);
59+
free(datain);
60+
free(backbuffer);
61+
return 0;
62+
}
63+

include/simdbitpacking.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
#include <emmintrin.h>// SSE2 is required
88
#include <stdint.h> // use a C99-compliant compiler, please
99
#include <string.h> // for memset
10-
//reads 128 values in in, writes bit values from out
10+
11+
//reads 128 values from "in", writes "bit" 128-bit vectors to "out"
1112
void simdpack(const uint32_t * in,__m128i * out, uint32_t bit);
12-
//reads 128 values in in, writes bit values from out
13+
14+
//reads 128 values from "in", writes "bit" 128-bit vectors to "out"
1315
void simdpackwithoutmask(const uint32_t * in,__m128i * out, uint32_t bit);
14-
//reads bit values in in, writes 128 values to out
16+
17+
//reads "bit" 128-bit vectors from "in", writes 128 values to "out"
1518
void simdunpack(const __m128i * in,uint32_t * out, uint32_t bit);
1619

1720

include/simdintegratedbitpacking.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,18 @@
99
#include <stdint.h> // use a C99-compliant compiler, please
1010

1111
#include "simdcomputil.h"
12-
//reads 128 values in in, writes bit values from out
12+
13+
//reads 128 values from "in", writes "bit" 128-bit vectors to "out"
14+
// integer values should be in sorted order (for best results)
1315
void simdpackd1(uint32_t initvalue, const uint32_t * in,__m128i * out, uint32_t bit);
14-
//reads 128 values in in, writes bit values from out
16+
17+
18+
//reads 128 values from "in", writes "bit" 128-bit vectors to "out"
19+
// integer values should be in sorted order (for best results)
1520
void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t * in,__m128i * out, uint32_t bit);
16-
//reads bit values in in, writes 128 values to out
21+
22+
23+
//reads "bit" 128-bit vectors from "in", writes 128 values to "out"
1724
void simdunpackd1(uint32_t initvalue, const __m128i * in,uint32_t * out, uint32_t bit);
1825

1926

makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ simdbitpacking.o: ./src/simdbitpacking.c $(HEADERS)
4242
simdintegratedbitpacking.o: ./src/simdintegratedbitpacking.c $(HEADERS)
4343
$(CC) $(CFLAGS) -c ./src/simdintegratedbitpacking.c -Iinclude
4444

45+
example: ./example.c $(HEADERS) $(OBJECTS)
46+
$(CC) $(CFLAGS) -o example ./example.c -Iinclude $(OBJECTS)
47+
4548
unit: ./src/unit.c $(HEADERS) $(OBJECTS)
4649
$(CC) $(CFLAGS) -o unit ./src/unit.c -Iinclude $(OBJECTS)
4750
dynunit: ./src/unit.c $(HEADERS) $(LIBNAME)

src/unit.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <stdio.h>
55
#include "simdcomp.h"
66

7+
78
int main() {
89
int N = 5000 * SIMDBlockSize;
910
__m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
@@ -15,20 +16,34 @@ int main() {
1516
datain[k] = k * gap;
1617
uint32_t offset = 0;
1718
for (int k = 0; k * SIMDBlockSize < N; ++k) {
19+
/////////////////////////////
20+
// First part works for general arrays (sorted or unsorted)
21+
/////////////////////////////
22+
// we compute the bit width
1823
const uint32_t b = maxbits(datain + k * SIMDBlockSize);
19-
simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);//compressed
24+
// we read 128 integers at "datain + k * SIMDBlockSize" and
25+
// write b 128-bit vectors at "buffer"
26+
simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
27+
// we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer
2028
simdunpack(buffer, backbuffer, b);//uncompressed
2129
for (int j = 0; j < SIMDBlockSize; ++j) {
2230
if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
2331
printf("bug in simdpack\n");
2432
return -2;
2533
}
2634
}
35+
/////////////////////////////
36+
// next part assumes that the data is sorted (uses differential coding)
37+
/////////////////////////////
38+
// we compute the bit width
2739
const uint32_t b1 = simdmaxbitsd1(offset,
2840
datain + k * SIMDBlockSize);
41+
// we read 128 integers at "datain + k * SIMDBlockSize" and
42+
// write b1 128-bit vectors at "buffer"
2943
simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
30-
b1);//compressed
31-
simdunpackd1(offset, buffer, backbuffer, b1);//uncompressed
44+
b1);
45+
// we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer
46+
simdunpackd1(offset, buffer, backbuffer, b1);
3247
for (int j = 0; j < SIMDBlockSize; ++j) {
3348
if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
3449
printf("bug in simdpack d1\n");

0 commit comments

Comments
 (0)