Skip to content

Commit 0558c35

Browse files
committed
[perf] use StackVector to save memory & CPU
1 parent fb302a1 commit 0558c35

File tree

4 files changed

+61
-25
lines changed

4 files changed

+61
-25
lines changed

src/filters/network.rs

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ use crate::filters::abstract_network::{
1515
use crate::lists::ParseOptions;
1616
use crate::regex_manager::RegexManager;
1717
use crate::request;
18-
use crate::utils::{self, Hash};
19-
20-
pub(crate) const TOKENS_BUFFER_SIZE: usize = 200;
18+
use crate::utils::{self, Hash, TokensBuffer};
2119

2220
/// For now, only support `$removeparam` with simple alphanumeric/dash/underscore patterns.
2321
static VALID_PARAM: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_\-]+$").unwrap());
@@ -895,7 +893,7 @@ impl NetworkFilter {
895893
}
896894

897895
pub fn get_tokens_optimized(&self) -> FilterTokens {
898-
let mut tokens: Vec<Hash> = Vec::with_capacity(TOKENS_BUFFER_SIZE);
896+
let mut tokens = TokensBuffer::default();
899897

900898
// If there is only one domain and no domain negation, we also use this
901899
// domain as a token.
@@ -905,7 +903,7 @@ impl NetworkFilter {
905903
{
906904
if let Some(domains) = self.opt_domains.as_ref() {
907905
if let Some(domain) = domains.first() {
908-
tokens.push(*domain)
906+
tokens.push(*domain);
909907
}
910908
}
911909
}
@@ -963,10 +961,7 @@ impl NetworkFilter {
963961
tokens.push(utils::fast_hash("https"));
964962
}
965963

966-
// Remake a vector to drop extra capacity.
967-
let mut t = Vec::with_capacity(tokens.len());
968-
t.extend(tokens);
969-
FilterTokens::Other(t)
964+
FilterTokens::Other(tokens.into_vec())
970965
}
971966
}
972967
}

src/flatbuffers/unsafe_tools.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Unsafe utility functions for working with flatbuffers and other low-level operations.
22
33
use crate::filters::flatbuffer_generated::fb;
4+
use std::mem::MaybeUninit;
45

56
// Minimum alignment for the beginning of the flatbuffer data.
67
const MIN_ALIGNMENT: usize = 8;
@@ -97,3 +98,46 @@ impl VerifiedFlatbufferMemory {
9798
&self.raw_data[self.start..]
9899
}
99100
}
101+
102+
/// A simple stack-allocated vector.
103+
/// It is used to avoid allocations when the vector is small.
104+
pub(crate) struct StackVector<T, const MAX_SIZE: usize> {
105+
data: [MaybeUninit<T>; MAX_SIZE],
106+
size: usize,
107+
}
108+
109+
impl<T, const MAX_SIZE: usize> Default for StackVector<T, MAX_SIZE>
110+
where
111+
T: Default + Copy,
112+
{
113+
fn default() -> Self {
114+
Self {
115+
data: [MaybeUninit::uninit(); MAX_SIZE],
116+
size: 0,
117+
}
118+
}
119+
}
120+
121+
impl<T, const MAX_SIZE: usize> StackVector<T, MAX_SIZE> {
122+
pub fn push(&mut self, value: T) -> bool {
123+
if self.size < MAX_SIZE {
124+
self.data[self.size] = MaybeUninit::new(value);
125+
self.size += 1;
126+
true
127+
} else {
128+
false
129+
}
130+
}
131+
132+
pub fn is_empty(&self) -> bool {
133+
self.size == 0
134+
}
135+
136+
pub fn into_vec(self) -> Vec<T> {
137+
let mut v = Vec::with_capacity(self.size);
138+
for i in 0..self.size {
139+
v.push(unsafe { self.data[i].assume_init_read() });
140+
}
141+
v
142+
}
143+
}

src/request.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,11 +239,11 @@ impl Request {
239239
}
240240

241241
fn calculate_tokens(url_lower_cased: &str) -> Vec<utils::Hash> {
242-
let mut tokens = vec![];
242+
let mut tokens = utils::TokensBuffer::default();
243243
utils::tokenize_pooled(url_lower_cased, &mut tokens);
244244
// Add zero token as a fallback to wildcard rule bucket
245245
tokens.push(0);
246-
tokens
246+
tokens.into_vec()
247247
}
248248

249249
#[cfg(test)]

src/utils.rs

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ use seahash::hash;
66
#[cfg(target_pointer_width = "32")]
77
use seahash::reference::hash;
88

9+
use crate::flatbuffers::unsafe_tools::StackVector;
10+
911
pub type Hash = u64;
1012

1113
// A smaller version of Hash that is used in serialized format.
@@ -27,26 +29,21 @@ fn is_allowed_filter(ch: char) -> bool {
2729
ch.is_alphanumeric() || ch == '%'
2830
}
2931

30-
pub(crate) const TOKENS_BUFFER_SIZE: usize = 128;
31-
pub(crate) const TOKENS_BUFFER_RESERVED: usize = 1;
32-
const TOKENS_MAX: usize = TOKENS_BUFFER_SIZE - TOKENS_BUFFER_RESERVED;
32+
pub(crate) type TokensBuffer = StackVector<Hash, 200>;
3333

3434
fn fast_tokenizer_no_regex(
3535
pattern: &str,
3636
is_allowed_code: &dyn Fn(char) -> bool,
3737
skip_first_token: bool,
3838
skip_last_token: bool,
39-
tokens_buffer: &mut Vec<Hash>,
39+
tokens_buffer: &mut TokensBuffer,
4040
) {
4141
// let mut tokens_buffer_index = 0;
4242
let mut inside: bool = false;
4343
let mut start = 0;
4444
let mut preceding_ch: Option<char> = None; // Used to check if a '*' is not just before a token
4545

4646
for (i, c) in pattern.char_indices() {
47-
if tokens_buffer.len() >= TOKENS_MAX {
48-
return;
49-
}
5047
if is_allowed_code(c) {
5148
if !inside {
5249
inside = true;
@@ -75,17 +72,17 @@ fn fast_tokenizer_no_regex(
7572
}
7673
}
7774

78-
pub(crate) fn tokenize_pooled(pattern: &str, tokens_buffer: &mut Vec<Hash>) {
75+
pub(crate) fn tokenize_pooled(pattern: &str, tokens_buffer: &mut TokensBuffer) {
7976
fast_tokenizer_no_regex(pattern, &is_allowed_filter, false, false, tokens_buffer);
8077
}
8178

8279
pub fn tokenize(pattern: &str) -> Vec<Hash> {
83-
let mut tokens_buffer: Vec<Hash> = Vec::with_capacity(TOKENS_BUFFER_SIZE);
80+
let mut tokens_buffer = TokensBuffer::default();
8481
tokenize_to(pattern, &mut tokens_buffer);
85-
tokens_buffer
82+
tokens_buffer.into_vec()
8683
}
8784

88-
pub(crate) fn tokenize_to(pattern: &str, tokens_buffer: &mut Vec<Hash>) {
85+
pub(crate) fn tokenize_to(pattern: &str, tokens_buffer: &mut TokensBuffer) {
8986
fast_tokenizer_no_regex(pattern, &is_allowed_filter, false, false, tokens_buffer);
9087
}
9188

@@ -95,21 +92,21 @@ pub(crate) fn tokenize_filter(
9592
skip_first_token: bool,
9693
skip_last_token: bool,
9794
) -> Vec<Hash> {
98-
let mut tokens_buffer: Vec<Hash> = Vec::with_capacity(TOKENS_BUFFER_SIZE);
95+
let mut tokens_buffer = TokensBuffer::default();
9996
tokenize_filter_to(
10097
pattern,
10198
skip_first_token,
10299
skip_last_token,
103100
&mut tokens_buffer,
104101
);
105-
tokens_buffer
102+
tokens_buffer.into_vec()
106103
}
107104

108105
pub(crate) fn tokenize_filter_to(
109106
pattern: &str,
110107
skip_first_token: bool,
111108
skip_last_token: bool,
112-
tokens_buffer: &mut Vec<Hash>,
109+
tokens_buffer: &mut TokensBuffer,
113110
) {
114111
fast_tokenizer_no_regex(
115112
pattern,

0 commit comments

Comments
 (0)