Skip to content

Commit 9163f30

Browse files
committed
Merge branch 'main' into idna-v1x
2 parents e81799f + 5d363cc commit 9163f30

File tree

15 files changed

+698
-201
lines changed

15 files changed

+698
-201
lines changed

.github/workflows/main.yml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ jobs:
3535
- uses: dtolnay/rust-toolchain@master
3636
with:
3737
toolchain: ${{ matrix.rust }}
38+
# Add toolchain for no_std tests
39+
- run: rustup toolchain install nightly
40+
- name: Add `aarch64-unknown-none` toolchain for `no_std` tests
41+
if: |
42+
matrix.os == 'ubuntu-latest' &&
43+
matrix.rust == 'nightly'
44+
run: rustup target add aarch64-unknown-none && rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
3845
- run: cargo build --all-targets
3946
# Run tests
4047
- name: Run tests
@@ -47,10 +54,18 @@ jobs:
4754
- name: Run debugger_visualizer tests
4855
if: |
4956
matrix.os == 'windows-latest' &&
50-
matrix.rust != '1.67.0'
51-
run: cargo test --test debugger_visualizer --features "url/debugger_visualizer,url_debug_tests/debugger_visualizer" -- --test-threads=1
57+
matrix.rust != '1.56.0'
58+
run: cargo test --test debugger_visualizer --features "url/debugger_visualizer,url_debug_tests/debugger_visualizer" -- --test-threads=1 || echo "debugger test failed"
59+
continue-on-error: true # Fails on GH actions, but not locally.
5260
- name: Test `no_std` support
5361
run: cargo test --no-default-features --features=alloc
62+
- name: Build `url` crate for `aarch64-unknown-none` with `no_std`
63+
if: |
64+
matrix.os == 'ubuntu-latest' &&
65+
matrix.rust == 'nightly'
66+
run: >
67+
cd url
68+
&& cargo check --target aarch64-unknown-none -v --no-default-features
5469
5570
WASM:
5671
runs-on: ubuntu-latest

data-url/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ alloc = []
1717

1818
[dev-dependencies]
1919
tester = "0.9"
20+
# We pin this transitive dev dep so that MSRV CI can continue to run.
2021
unicode-width = "=0.1.12"
21-
serde = {version = "1.0", features = ["derive"]}
22+
serde = { version = "1.0", default-features = false, features = ["alloc", "derive"] }
2223
serde_json = "1.0"
2324

2425
[lib]

idna/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ impl From<Errors> for Result<(), Errors> {
7474
#[cfg(feature = "std")]
7575
impl std::error::Error for Errors {}
7676

77+
#[cfg(not(feature = "std"))]
78+
impl core::error::Error for Errors {}
79+
7780
impl core::fmt::Display for Errors {
7881
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
7982
core::fmt::Debug::fmt(self, f)

percent_encoding/src/ascii_set.rs

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
// Copyright 2013-2016 The rust-url developers.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
8+
9+
use core::{mem, ops};
10+
11+
/// Represents a set of characters or bytes in the ASCII range.
12+
///
13+
/// This is used in [`percent_encode`] and [`utf8_percent_encode`].
14+
/// This is similar to [percent-encode sets](https://url.spec.whatwg.org/#percent-encoded-bytes).
15+
///
16+
/// Use the `add` method of an existing set to define a new set. For example:
17+
///
18+
/// [`percent_encode`]: crate::percent_encode
19+
/// [`utf8_percent_encode`]: crate::utf8_percent_encode
20+
///
21+
/// ```
22+
/// use percent_encoding::{AsciiSet, CONTROLS};
23+
///
24+
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
25+
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
26+
/// ```
27+
#[derive(Debug, PartialEq, Eq)]
28+
pub struct AsciiSet {
29+
mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
30+
}
31+
32+
type Chunk = u32;
33+
34+
const ASCII_RANGE_LEN: usize = 0x80;
35+
36+
const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();
37+
38+
impl AsciiSet {
39+
/// An empty set.
40+
pub const EMPTY: AsciiSet = AsciiSet {
41+
mask: [0; ASCII_RANGE_LEN / BITS_PER_CHUNK],
42+
};
43+
44+
/// Called with UTF-8 bytes rather than code points.
45+
/// Not used for non-ASCII bytes.
46+
pub(crate) const fn contains(&self, byte: u8) -> bool {
47+
let chunk = self.mask[byte as usize / BITS_PER_CHUNK];
48+
let mask = 1 << (byte as usize % BITS_PER_CHUNK);
49+
(chunk & mask) != 0
50+
}
51+
52+
pub(crate) fn should_percent_encode(&self, byte: u8) -> bool {
53+
!byte.is_ascii() || self.contains(byte)
54+
}
55+
56+
pub const fn add(&self, byte: u8) -> Self {
57+
let mut mask = self.mask;
58+
mask[byte as usize / BITS_PER_CHUNK] |= 1 << (byte as usize % BITS_PER_CHUNK);
59+
AsciiSet { mask }
60+
}
61+
62+
pub const fn remove(&self, byte: u8) -> Self {
63+
let mut mask = self.mask;
64+
mask[byte as usize / BITS_PER_CHUNK] &= !(1 << (byte as usize % BITS_PER_CHUNK));
65+
AsciiSet { mask }
66+
}
67+
68+
/// Return the union of two sets.
69+
pub const fn union(&self, other: Self) -> Self {
70+
let mask = [
71+
self.mask[0] | other.mask[0],
72+
self.mask[1] | other.mask[1],
73+
self.mask[2] | other.mask[2],
74+
self.mask[3] | other.mask[3],
75+
];
76+
AsciiSet { mask }
77+
}
78+
79+
/// Return the negation of the set.
80+
pub const fn complement(&self) -> Self {
81+
let mask = [!self.mask[0], !self.mask[1], !self.mask[2], !self.mask[3]];
82+
AsciiSet { mask }
83+
}
84+
}
85+
86+
impl ops::Add for AsciiSet {
87+
type Output = Self;
88+
89+
fn add(self, other: Self) -> Self {
90+
self.union(other)
91+
}
92+
}
93+
94+
impl ops::Not for AsciiSet {
95+
type Output = Self;
96+
97+
fn not(self) -> Self {
98+
self.complement()
99+
}
100+
}
101+
102+
/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
103+
///
104+
/// Note that this includes the newline and tab characters, but not the space 0x20.
105+
///
106+
/// <https://url.spec.whatwg.org/#c0-control-percent-encode-set>
107+
pub const CONTROLS: &AsciiSet = &AsciiSet {
108+
mask: [
109+
!0_u32, // C0: 0x00 to 0x1F (32 bits set)
110+
0,
111+
0,
112+
1 << (0x7F_u32 % 32), // DEL: 0x7F (one bit set)
113+
],
114+
};
115+
116+
macro_rules! static_assert {
117+
($( $bool: expr, )+) => {
118+
fn _static_assert() {
119+
$(
120+
let _ = mem::transmute::<[u8; $bool as usize], u8>;
121+
)+
122+
}
123+
}
124+
}
125+
126+
static_assert! {
127+
CONTROLS.contains(0x00),
128+
CONTROLS.contains(0x1F),
129+
!CONTROLS.contains(0x20),
130+
!CONTROLS.contains(0x7E),
131+
CONTROLS.contains(0x7F),
132+
}
133+
134+
/// Everything that is not an ASCII letter or digit.
135+
///
136+
/// This is probably more eager than necessary in any context.
137+
pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS
138+
.add(b' ')
139+
.add(b'!')
140+
.add(b'"')
141+
.add(b'#')
142+
.add(b'$')
143+
.add(b'%')
144+
.add(b'&')
145+
.add(b'\'')
146+
.add(b'(')
147+
.add(b')')
148+
.add(b'*')
149+
.add(b'+')
150+
.add(b',')
151+
.add(b'-')
152+
.add(b'.')
153+
.add(b'/')
154+
.add(b':')
155+
.add(b';')
156+
.add(b'<')
157+
.add(b'=')
158+
.add(b'>')
159+
.add(b'?')
160+
.add(b'@')
161+
.add(b'[')
162+
.add(b'\\')
163+
.add(b']')
164+
.add(b'^')
165+
.add(b'_')
166+
.add(b'`')
167+
.add(b'{')
168+
.add(b'|')
169+
.add(b'}')
170+
.add(b'~');
171+
172+
#[cfg(test)]
173+
mod tests {
174+
use super::*;
175+
176+
#[test]
177+
fn add_op() {
178+
let left = AsciiSet::EMPTY.add(b'A');
179+
let right = AsciiSet::EMPTY.add(b'B');
180+
let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
181+
assert_eq!(left + right, expected);
182+
}
183+
184+
#[test]
185+
fn not_op() {
186+
let set = AsciiSet::EMPTY.add(b'A').add(b'B');
187+
let not_set = !set;
188+
assert!(!not_set.contains(b'A'));
189+
assert!(not_set.contains(b'C'));
190+
}
191+
192+
/// This test ensures that we can get the union of two sets as a constant value, which is
193+
/// useful for defining sets in a modular way.
194+
#[test]
195+
fn union() {
196+
const A: AsciiSet = AsciiSet::EMPTY.add(b'A');
197+
const B: AsciiSet = AsciiSet::EMPTY.add(b'B');
198+
const UNION: AsciiSet = A.union(B);
199+
const EXPECTED: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
200+
assert_eq!(UNION, EXPECTED);
201+
}
202+
203+
/// This test ensures that we can get the complement of a set as a constant value, which is
204+
/// useful for defining sets in a modular way.
205+
#[test]
206+
fn complement() {
207+
const BOTH: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
208+
const COMPLEMENT: AsciiSet = BOTH.complement();
209+
assert!(!COMPLEMENT.contains(b'A'));
210+
assert!(!COMPLEMENT.contains(b'B'));
211+
assert!(COMPLEMENT.contains(b'C'));
212+
}
213+
}

0 commit comments

Comments
 (0)