Skip to content

Commit dde3e49

Browse files
committed
Fixing more overflows
1 parent 1ad101c commit dde3e49

File tree

3 files changed

+170
-79
lines changed

3 files changed

+170
-79
lines changed

src/simdbitpacking.c

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14261,28 +14261,63 @@ const __m128i *simdunpack_shortlength(const __m128i *in, int length,
1426114261
inwordpointer = 0;
1426214262
P = _mm_loadu_si128((__m128i *)in);
1426314263
++in;
14264-
for (k = 0; k < length / 4; ++k) {
14265-
__m128i answer = _mm_srli_epi32(P, inwordpointer);
14266-
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
14267-
if (bit < firstpass) {
14268-
inwordpointer += bit;
14269-
} else {
14270-
P = _mm_loadu_si128((__m128i *)in);
14271-
++in;
14272-
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
14273-
inwordpointer = bit - firstpass;
14264+
if (length % 4 == 0) {
14265+
14266+
for (k = 0; k + 1 < length / 4; ++k) {
14267+
__m128i answer = _mm_srli_epi32(P, inwordpointer);
14268+
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
14269+
if (bit < firstpass) {
14270+
inwordpointer += bit;
14271+
} else {
14272+
P = _mm_loadu_si128((__m128i *)in);
14273+
++in;
14274+
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
14275+
inwordpointer = bit - firstpass;
14276+
}
14277+
answer = _mm_and_si128(maskbits, answer);
14278+
_mm_storeu_si128((__m128i *)out, answer);
14279+
out += 4;
14280+
}
14281+
if (k < length / 4) {
14282+
__m128i answer = _mm_srli_epi32(P, inwordpointer);
14283+
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
14284+
if (bit < firstpass) {
14285+
inwordpointer += bit;
14286+
} else if (bit == firstpass) {
14287+
inwordpointer = 0;
14288+
} else {
14289+
P = _mm_loadu_si128((__m128i *)in);
14290+
++in;
14291+
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
14292+
inwordpointer = bit - firstpass;
14293+
}
14294+
answer = _mm_and_si128(maskbits, answer);
14295+
_mm_storeu_si128((__m128i *)out, answer);
14296+
out += 4;
14297+
}
14298+
14299+
} else {
14300+
for (k = 0; k < length / 4; ++k) {
14301+
__m128i answer = _mm_srli_epi32(P, inwordpointer);
14302+
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
14303+
if (bit < firstpass) {
14304+
inwordpointer += bit;
14305+
} else {
14306+
P = _mm_loadu_si128((__m128i *)in);
14307+
++in;
14308+
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
14309+
inwordpointer = bit - firstpass;
14310+
}
14311+
answer = _mm_and_si128(maskbits, answer);
14312+
_mm_storeu_si128((__m128i *)out, answer);
14313+
out += 4;
1427414314
}
14275-
answer = _mm_and_si128(maskbits, answer);
14276-
_mm_storeu_si128((__m128i *)out, answer);
14277-
out += 4;
14278-
}
14279-
if (length % 4 != 0) {
1428014315
uint32_t buffer[4];
1428114316
__m128i answer = _mm_srli_epi32(P, inwordpointer);
1428214317
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
1428314318
if (bit < firstpass) {
1428414319
inwordpointer += bit;
14285-
} else if(bit == firstpass) {
14320+
} else if (bit == firstpass) {
1428614321
inwordpointer = 0;
1428714322
} else {
1428814323
P = _mm_loadu_si128((__m128i *)in);

src/simdfor.c

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15076,7 +15076,7 @@ __m128i *simdpackFOR_length(uint32_t initvalue, const uint32_t *in, int length,
1507615076
buffer[k] = in[length / 4 * 4 + k];
1507715077
}
1507815078
for (k = (length % 4); k < 4; ++k) {
15079-
buffer[k] = 0;
15079+
buffer[k] = initvalue;
1508015080
}
1508115081
value = _mm_sub_epi32(_mm_loadu_si128((__m128i *)buffer), offset);
1508215082
P = _mm_or_si128(P, _mm_slli_epi32(value, inwordpointer));
@@ -15120,28 +15120,63 @@ const __m128i *simdunpackFOR_length(uint32_t initvalue, const __m128i *in,
1512015120
inwordpointer = 0;
1512115121
P = _mm_loadu_si128((__m128i *)in);
1512215122
++in;
15123-
for (k = 0; k < length / 4; ++k) {
15124-
__m128i answer = _mm_srli_epi32(P, inwordpointer);
15125-
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
15126-
if (bit < firstpass) {
15127-
inwordpointer += bit;
15128-
} else {
15129-
P = _mm_loadu_si128((__m128i *)in);
15130-
++in;
15131-
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
15132-
inwordpointer = bit - firstpass;
15123+
if (length % 4 == 0) {
15124+
15125+
for (k = 0; k + 1 < length / 4; ++k) {
15126+
__m128i answer = _mm_srli_epi32(P, inwordpointer);
15127+
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
15128+
if (bit < firstpass) {
15129+
inwordpointer += bit;
15130+
} else {
15131+
P = _mm_loadu_si128((__m128i *)in);
15132+
++in;
15133+
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
15134+
inwordpointer = bit - firstpass;
15135+
}
15136+
answer = _mm_and_si128(maskbits, answer);
15137+
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
15138+
out += 4;
15139+
}
15140+
if (k < length / 4) {
15141+
__m128i answer = _mm_srli_epi32(P, inwordpointer);
15142+
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
15143+
if (bit < firstpass) {
15144+
inwordpointer += bit;
15145+
} else if (bit == firstpass) {
15146+
inwordpointer = 0;
15147+
} else {
15148+
P = _mm_loadu_si128((__m128i *)in);
15149+
++in;
15150+
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
15151+
inwordpointer = bit - firstpass;
15152+
}
15153+
answer = _mm_and_si128(maskbits, answer);
15154+
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
15155+
out += 4;
15156+
}
15157+
15158+
} else {
15159+
for (k = 0; k < length / 4; ++k) {
15160+
__m128i answer = _mm_srli_epi32(P, inwordpointer);
15161+
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
15162+
if (bit < firstpass) {
15163+
inwordpointer += bit;
15164+
} else {
15165+
P = _mm_loadu_si128((__m128i *)in);
15166+
++in;
15167+
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
15168+
inwordpointer = bit - firstpass;
15169+
}
15170+
answer = _mm_and_si128(maskbits, answer);
15171+
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
15172+
out += 4;
1513315173
}
15134-
answer = _mm_and_si128(maskbits, answer);
15135-
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
15136-
out += 4;
15137-
}
15138-
if (length % 4 != 0) {
1513915174
uint32_t buffer[4];
1514015175
__m128i answer = _mm_srli_epi32(P, inwordpointer);
1514115176
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
1514215177
if (bit < firstpass) {
1514315178
inwordpointer += bit;
15144-
} else if(bit == firstpass) {
15179+
} else if (bit == firstpass) {
1514515180
inwordpointer = 0;
1514615181
} else {
1514715182
P = _mm_loadu_si128((__m128i *)in);

tests/unit.c

Lines changed: 67 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,57 +7,78 @@
77
#include <stdlib.h>
88

99
int issue21() {
10-
size_t sz = 110;
11-
size_t i;
12-
uint32_t *in = malloc(sz * sizeof(uint32_t));
13-
uint32_t *out = malloc(sz * sizeof(uint32_t));
14-
for (i = 0; i < sz; ++i)
15-
in[i] = 255;
16-
uint32_t b = maxbits_length(in, sz);
17-
uint8_t *buf = malloc(simdpack_compressedbytes(sz, b));
18-
__m128i *end = simdpack_length(in, sz, (__m128i *)buf, b);
19-
if((uint8_t *)end - buf != simdpack_compressedbytes(sz, b)) {
20-
printf("bad mem usage\n");
21-
return -1;
22-
}
23-
simdunpack_length((const __m128i *)buf, sz, out, b);
24-
for (i = 0; i < sz; ++i) {
25-
if (in[i] != out[i]) {
26-
printf("bug\n");
27-
return -1;
10+
printf("issue21");
11+
fflush(stdout);
12+
for (uint32_t bw = 0; bw < 30; bw++) {
13+
printf(".");
14+
fflush(stdout);
15+
for (size_t sz = 1; sz < 4096; sz++) {
16+
17+
size_t i;
18+
uint32_t *in = malloc(sz * sizeof(uint32_t));
19+
uint32_t *out = malloc(sz * sizeof(uint32_t));
20+
for (i = 0; i < sz; ++i)
21+
in[i] = (1 << bw) - 1;
22+
uint32_t b = maxbits_length(in, sz);
23+
uint8_t *buf = malloc(simdpack_compressedbytes(sz, b));
24+
__m128i *end = simdpack_length(in, sz, (__m128i *)buf, b);
25+
if ((uint8_t *)end - buf != simdpack_compressedbytes(sz, b)) {
26+
printf("bad mem usage\n");
27+
return -1;
28+
}
29+
simdunpack_length((const __m128i *)buf, sz, out, b);
30+
for (i = 0; i < sz; ++i) {
31+
if (in[i] != out[i]) {
32+
printf("bug\n");
33+
return -1;
34+
}
35+
}
36+
free(in);
37+
free(out);
38+
free(buf);
2839
}
2940
}
30-
free(in);
31-
free(out);
32-
free(buf);
41+
printf("\n");
3342
return 0;
3443
}
3544

3645
int issue21FOR() {
37-
size_t i;
38-
size_t sz = 110;
39-
uint32_t *in = malloc(sz * sizeof(uint32_t));
40-
uint32_t *out = malloc(sz * sizeof(uint32_t));
41-
in[0] = 0;
42-
for (i = 1; i < sz; ++i)
43-
in[i] = 255;
44-
uint32_t b = maxbits_length(in, sz);
45-
uint8_t *buf = malloc(simdpackFOR_compressedbytes(sz, b));
46-
__m128i *end = simdpackFOR_length(0, in, sz, (__m128i *)buf, b);
47-
if((uint8_t *)end - buf != simdpackFOR_compressedbytes(sz, b)) {
48-
printf("bad mem usage\n");
49-
return -1;
50-
}
51-
simdunpackFOR_length(0, (const __m128i *)buf, sz, out, b);
52-
for (i = 0; i < sz; ++i) {
53-
if (in[i] != out[i]) {
54-
printf("bug\n");
55-
return -1;
46+
size_t i, j;
47+
printf("issue21for");
48+
fflush(stdout);
49+
for (uint32_t bw = 0; bw < 30; bw++) {
50+
printf(".");
51+
fflush(stdout);
52+
for (size_t sz = 1; sz < 4096; sz++) {
53+
54+
uint32_t *in = malloc(sz * sizeof(uint32_t));
55+
uint32_t *out = malloc(sz * sizeof(uint32_t));
56+
in[0] = 0;
57+
for (i = 1; i < sz; ++i)
58+
in[i] = (1 << bw) - 1;
59+
uint32_t b = maxbits_length(in, sz);
60+
uint8_t *buf = malloc(simdpackFOR_compressedbytes(sz, b));
61+
__m128i *end = simdpackFOR_length(0, in, sz, (__m128i *)buf, b);
62+
if ((uint8_t *)end - buf != simdpackFOR_compressedbytes(sz, b)) {
63+
printf("bad mem usage\n");
64+
return -1;
65+
}
66+
simdunpackFOR_length(0, (const __m128i *)buf, sz, out, b);
67+
for (i = 0; i < sz; ++i) {
68+
if (in[i] != out[i]) {
69+
for (j = 0; j < sz; ++j) {
70+
printf("%zu : %u %u \n", j, in[j], out[j]);
71+
}
72+
printf("bug\n");
73+
return -1;
74+
}
75+
}
76+
free(in);
77+
free(out);
78+
free(buf);
5679
}
5780
}
58-
free(in);
59-
free(out);
60-
free(buf);
81+
printf("\n");
6182
return 0;
6283
}
6384

@@ -404,7 +425,7 @@ int testavx2() {
404425
int k;
405426
printf(" gap = %u \n", gap);
406427
for (k = 0; k < N; ++k)
407-
datain[k] = (uint32_t)(((uint64_t)k * gap)&0xFFFFFFFF);
428+
datain[k] = (uint32_t)(((uint64_t)k * gap) & 0xFFFFFFFF);
408429
for (k = 0; k * AVXBlockSize < N; ++k) {
409430
/*
410431
First part works for general arrays (sorted or unsorted)
@@ -567,7 +588,7 @@ int test() {
567588
int k;
568589
printf(" gap = %u \n", gap);
569590
for (k = 0; k < N; ++k)
570-
datain[k] = (uint32_t)(((uint64_t)k * gap)&0xFFFFFFFF);
591+
datain[k] = (uint32_t)(((uint64_t)k * gap) & 0xFFFFFFFF);
571592
for (k = 0; k * SIMDBlockSize < N; ++k) {
572593
/*
573594
First part works for general arrays (sorted or unsorted)
@@ -630,7 +651,7 @@ int testFOR() {
630651
int k;
631652
printf(" gap = %u \n", gap);
632653
for (k = 0; k < N; ++k)
633-
datain[k] = (uint32_t)(((uint64_t)k * gap)&0xFFFFFFFF);
654+
datain[k] = (uint32_t)(((uint64_t)k * gap) & 0xFFFFFFFF);
634655
for (k = 0; k * SIMDBlockSize < N; ++k) {
635656
int j;
636657
simdmaxmin_length(datain + k * SIMDBlockSize, SIMDBlockSize, &tmin,

0 commit comments

Comments
 (0)