diff --git a/lib/Cargo.lock b/lib/Cargo.lock index 5c5ef09..20cf6fe 100644 --- a/lib/Cargo.lock +++ b/lib/Cargo.lock @@ -17,15 +17,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anes" version = "0.1.6" @@ -92,16 +83,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" -[[package]] -name = "cc" -version = "1.2.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" -dependencies = [ - "find-msvc-tools", - "shlex", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -110,15 +91,11 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ - "iana-time-zone", - "js-sys", "num-traits", - "wasm-bindgen", - "windows-link", ] [[package]] @@ -189,12 +166,6 @@ dependencies = [ "sha2", ] -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - [[package]] name = "cpufeatures" version = "0.2.17" @@ -323,17 +294,11 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "find-msvc-tools" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" - [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", "miniz_oxide", @@ -393,30 +358,6 @@ dependencies = [ "digest", ] -[[package]] -name = "iana-time-zone" -version = "0.1.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "is-terminal" version = "0.4.17" @@ -465,12 +406,6 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" -[[package]] -name = "log" -version = "0.4.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" - [[package]] name = "md-5" version = "0.10.6" @@ -808,12 +743,6 @@ dependencies = [ "digest", ] -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - [[package]] name = "simd-adler32" version = "0.3.7" @@ -976,65 +905,12 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - [[package]] name = "windows-sys" version = "0.61.2" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8c902d8..f5b7ea6 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -8,15 +8,15 @@ name = "convert_core" crate-type = ["cdylib"] [dependencies] -base64 = "0.22" -sha2 = "0.10" -sha1 = "0.10" -md-5 = "0.10" -hmac = "0.12" -flate2 = "1.0" -percent-encoding = "2.3" -chrono = "0.4" +base64 = { version = "0.22.1", default-features = false, features = ["std"] } +sha2 = { version = "0.10.9", default-features = false } +sha1 = { version = "0.10.6", default-features = false } +md-5 = { version = "0.10.6", default-features = false } +hmac = { version = "0.12.1", default-features = false } +flate2 = { version = "1.1.8", default-features = false, features = ["rust_backend"] } +percent-encoding = { version = "2.3.2", default-features = false, features = ["alloc"] } +chrono = { version = "0.4.43", default-features = false, features = ["std"] } [dev-dependencies] -criterion = "0.5" -proptest = "1.4" +criterion = "0.5.1" +proptest = "1.9.0" diff --git a/lib/src/base64.rs b/lib/src/base64.rs deleted file mode 100644 index 56731e6..0000000 --- a/lib/src/base64.rs +++ /dev/null @@ -1,1916 +0,0 @@ -//! Base64 encoding and decoding functions - -use base64::{Engine as _, engine::general_purpose}; -use std::ffi::{CStr, CString}; -use std::os::raw::c_char; - -/// Convert a string to Base64 encoding -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `input` is a valid null-terminated C string or null -/// - `encoding` is a valid null-terminated C string or null -/// - The returned pointer must be freed using `free_string` -#[unsafe(no_mangle)] -pub unsafe extern "C" fn string_to_base64( - input: *const c_char, - encoding: *const c_char, -) -> *mut c_char { - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Check for deprecated UTF7 encoding (both UTF7 and UTF-7 variants) - if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { - crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); - return std::ptr::null_mut(); - } - - // Convert string to bytes based on encoding - let bytes = match convert_string_to_bytes(input_str, encoding_str) { - Ok(b) => b, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Encode to Base64 - let encoded = general_purpose::STANDARD.encode(&bytes); - - // Convert to C string - match CString::new(encoded) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error("Failed to create C string from Base64 result".to_string()); - std::ptr::null_mut() - } - } -} - -/// Convert a Base64 string back to a regular string -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `input` is a valid null-terminated C string or null -/// - `encoding` is a valid null-terminated C string or null -/// - The returned pointer must be freed using `free_string` -#[unsafe(no_mangle)] -pub unsafe extern "C" fn base64_to_string( - input: *const c_char, - encoding: *const c_char, -) -> *mut c_char { - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Decode from Base64 - let decoded_bytes = match general_purpose::STANDARD.decode(input_str) { - Ok(bytes) => bytes, - Err(e) => { - crate::error::set_error(format!("Failed to decode Base64: {}", e)); - return std::ptr::null_mut(); - } - }; - - // Convert bytes to string based on encoding - let result_string = match convert_bytes_to_string(&decoded_bytes, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert to C string - match CString::new(result_string) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error("Failed to create C string from decoded result".to_string()); - std::ptr::null_mut() - } - } -} - -/// Decode a Base64 string to a string with Latin-1 fallback for binary data -/// -/// This is a lenient version of `base64_to_string` that automatically falls back to -/// Latin-1 (ISO-8859-1) encoding when the decoded bytes are invalid for the specified -/// encoding. This is useful for handling binary data encoded as Base64. -/// -/// # Safety -/// Same safety requirements as `base64_to_string`. -#[unsafe(no_mangle)] -pub unsafe extern "C" fn base64_to_string_lenient( - input: *const c_char, - encoding: *const c_char, -) -> *mut c_char { - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Decode from Base64 - let decoded_bytes = match general_purpose::STANDARD.decode(input_str) { - Ok(bytes) => bytes, - Err(e) => { - crate::error::set_error(format!("Failed to decode Base64: {}", e)); - return std::ptr::null_mut(); - } - }; - - // Convert bytes to string with Latin-1 fallback for binary data - let result_string = match convert_bytes_to_string_with_fallback(&decoded_bytes, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert to C string - match CString::new(result_string) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error("Failed to create C string from decoded result".to_string()); - std::ptr::null_mut() - } - } -} - -/// Convert a Rust string to bytes using the specified encoding -pub(crate) fn convert_string_to_bytes(input: &str, encoding: &str) -> Result, String> { - // Use eq_ignore_ascii_case to avoid allocating with to_uppercase() - if encoding.eq_ignore_ascii_case("UTF8") || encoding.eq_ignore_ascii_case("UTF-8") { - Ok(input.as_bytes().to_vec()) - } else if encoding.eq_ignore_ascii_case("ASCII") { - // Validate that all characters are ASCII - if input.is_ascii() { - Ok(input.as_bytes().to_vec()) - } else { - Err("String contains non-ASCII characters".to_string()) - } - } else if encoding.eq_ignore_ascii_case("UNICODE") - || encoding.eq_ignore_ascii_case("UTF16") - || encoding.eq_ignore_ascii_case("UTF-16") - { - // Unicode in .NET typically means UTF-16LE - let utf16: Vec = input.encode_utf16().collect(); - let mut bytes = Vec::with_capacity(utf16.len() * 2); - for word in utf16 { - bytes.push((word & 0xFF) as u8); - bytes.push((word >> 8) as u8); - } - Ok(bytes) - } else if encoding.eq_ignore_ascii_case("UTF32") || encoding.eq_ignore_ascii_case("UTF-32") { - // UTF-32LE encoding - let mut bytes = Vec::with_capacity(input.chars().count() * 4); - for ch in input.chars() { - let code_point = ch as u32; - bytes.push((code_point & 0xFF) as u8); - bytes.push(((code_point >> 8) & 0xFF) as u8); - bytes.push(((code_point >> 16) & 0xFF) as u8); - bytes.push(((code_point >> 24) & 0xFF) as u8); - } - Ok(bytes) - } else if encoding.eq_ignore_ascii_case("BIGENDIANUNICODE") - || encoding.eq_ignore_ascii_case("UTF16BE") - || encoding.eq_ignore_ascii_case("UTF-16BE") - { - // UTF-16BE encoding - let utf16: Vec = input.encode_utf16().collect(); - let mut bytes = Vec::with_capacity(utf16.len() * 2); - for word in utf16 { - bytes.push((word >> 8) as u8); - bytes.push((word & 0xFF) as u8); - } - Ok(bytes) - } else if encoding.eq_ignore_ascii_case("DEFAULT") { - // Default encoding is UTF-8 - Ok(input.as_bytes().to_vec()) - } else { - Err(format!("Unsupported encoding: {}", encoding)) - } -} - -/// Convert a byte array to Base64 encoding -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `bytes` is a valid pointer to a byte array or null -/// - `length` accurately represents the number of bytes to read -/// - The returned pointer must be freed using `free_string` -#[unsafe(no_mangle)] -pub unsafe extern "C" fn bytes_to_base64(bytes: *const u8, length: usize) -> *mut c_char { - // Validate null pointer - if bytes.is_null() { - crate::error::set_error("Byte array pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Handle zero length case - encode empty byte array to empty string - if length == 0 { - match CString::new("") { - Ok(c_str) => { - crate::error::clear_error(); - return c_str.into_raw(); - } - Err(_) => { - crate::error::set_error("Failed to create empty C string".to_string()); - return std::ptr::null_mut(); - } - } - } - - // Create a slice from the raw pointer - let byte_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; - - // Encode to Base64 - let encoded = general_purpose::STANDARD.encode(byte_slice); - - // Convert to C string - match CString::new(encoded) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error("Failed to create C string from Base64 result".to_string()); - std::ptr::null_mut() - } - } -} - -/// Convert a Base64 string to a byte array -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `input` is a valid null-terminated C string or null -/// - `out_length` is a valid pointer to a usize or null (optional) -/// - The returned pointer must be freed using `free_bytes` -#[unsafe(no_mangle)] -pub unsafe extern "C" fn base64_to_bytes(input: *const c_char, out_length: *mut usize) -> *mut u8 { - // Validate null pointer for input - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - - // Convert C string to Rust string - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - }; - - // Handle empty string case - if input_str.is_empty() { - crate::error::clear_error(); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - // Allocate an empty Vec using the helper function - return crate::memory::allocate_byte_array(Vec::::new()); - } - - // Decode from Base64 - let decoded_bytes = match general_purpose::STANDARD.decode(input_str) { - Ok(bytes) => bytes, - Err(e) => { - crate::error::set_error(format!("Failed to decode Base64: {}", e)); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - }; - - // Set output length (only if pointer provided) - let length = decoded_bytes.len(); - if !out_length.is_null() { - unsafe { - *out_length = length; - } - } - - // Allocate byte array with metadata header for proper deallocation - crate::error::clear_error(); - crate::memory::allocate_byte_array(decoded_bytes) -} - -/// Convert bytes to a Rust string using the specified encoding -pub(crate) fn convert_bytes_to_string(bytes: &[u8], encoding: &str) -> Result { - // Use eq_ignore_ascii_case to avoid allocating with to_uppercase() - if encoding.eq_ignore_ascii_case("UTF8") || encoding.eq_ignore_ascii_case("UTF-8") { - String::from_utf8(bytes.to_vec()).map_err(|e| format!("Invalid UTF-8 bytes: {}", e)) - } else if encoding.eq_ignore_ascii_case("ASCII") { - // Validate that all bytes are ASCII - if bytes.iter().all(|&b| b < 128) { - String::from_utf8(bytes.to_vec()).map_err(|e| format!("Invalid ASCII bytes: {}", e)) - } else { - Err("Bytes contain non-ASCII values".to_string()) - } - } else if encoding.eq_ignore_ascii_case("UNICODE") - || encoding.eq_ignore_ascii_case("UTF16") - || encoding.eq_ignore_ascii_case("UTF-16") - { - // Unicode in .NET typically means UTF-16LE - if !bytes.len().is_multiple_of(2) { - return Err("Invalid UTF-16 byte length (must be even)".to_string()); - } - - let mut utf16_chars = Vec::with_capacity(bytes.len() / 2); - for chunk in bytes.chunks_exact(2) { - let word = u16::from_le_bytes([chunk[0], chunk[1]]); - utf16_chars.push(word); - } - - String::from_utf16(&utf16_chars).map_err(|e| format!("Invalid UTF-16 bytes: {}", e)) - } else if encoding.eq_ignore_ascii_case("UTF32") || encoding.eq_ignore_ascii_case("UTF-32") { - // UTF-32LE encoding - if !bytes.len().is_multiple_of(4) { - return Err("Invalid UTF-32 byte length (must be multiple of 4)".to_string()); - } - - let mut result = String::new(); - for chunk in bytes.chunks_exact(4) { - let code_point = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]); - match char::from_u32(code_point) { - Some(ch) => result.push(ch), - None => return Err(format!("Invalid UTF-32 code point: {}", code_point)), - } - } - Ok(result) - } else if encoding.eq_ignore_ascii_case("BIGENDIANUNICODE") - || encoding.eq_ignore_ascii_case("UTF16BE") - || encoding.eq_ignore_ascii_case("UTF-16BE") - { - // UTF-16BE encoding - if !bytes.len().is_multiple_of(2) { - return Err("Invalid UTF-16BE byte length (must be even)".to_string()); - } - - let mut utf16_chars = Vec::with_capacity(bytes.len() / 2); - for chunk in bytes.chunks_exact(2) { - let word = u16::from_be_bytes([chunk[0], chunk[1]]); - utf16_chars.push(word); - } - - String::from_utf16(&utf16_chars).map_err(|e| format!("Invalid UTF-16BE bytes: {}", e)) - } else if encoding.eq_ignore_ascii_case("DEFAULT") { - // Default encoding is UTF-8 - String::from_utf8(bytes.to_vec()).map_err(|e| format!("Invalid UTF-8 bytes: {}", e)) - } else if encoding.eq_ignore_ascii_case("ISO-8859-1") - || encoding.eq_ignore_ascii_case("LATIN1") - || encoding.eq_ignore_ascii_case("LATIN-1") - { - // Latin-1 (ISO-8859-1) - each byte maps directly to a Unicode code point - // This encoding can represent any byte value (0x00-0xFF) - // Note: Null bytes (0x00) are replaced with Unicode replacement character (U+FFFD) - // to ensure the result can be safely passed through C string interfaces - Ok(bytes - .iter() - .map(|&b| if b == 0 { '\u{FFFD}' } else { b as char }) - .collect()) - } else { - Err(format!("Unsupported encoding: {}", encoding)) - } -} - -/// Convert bytes to a Rust string with automatic fallback to Latin-1 for binary data -/// -/// This function first attempts to decode using the specified encoding. If that fails -/// due to invalid byte sequences (common with binary data like certificates), it -/// automatically falls back to Latin-1 (ISO-8859-1) which can represent any byte value. -/// -/// Note: Null bytes (0x00) are replaced with the Unicode replacement character (U+FFFD) -/// to ensure the result can be safely passed through C string interfaces. -pub(crate) fn convert_bytes_to_string_with_fallback( - bytes: &[u8], - encoding: &str, -) -> Result { - match convert_bytes_to_string(bytes, encoding) { - Ok(s) => Ok(s), - Err(e) => { - // Check if this is an encoding error that Latin-1 fallback can handle - if e.contains("Invalid UTF-8") - || e.contains("Invalid ASCII") - || e.contains("Invalid UTF-16") - || e.contains("Invalid UTF-32") - || e.contains("non-ASCII values") - { - // Fall back to Latin-1 which can represent any byte - // Replace null bytes with replacement character for C string safety - Ok(bytes - .iter() - .map(|&b| if b == 0 { '\u{FFFD}' } else { b as char }) - .collect()) - } else { - // Other errors (unsupported encoding, wrong byte length) should propagate - Err(e) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::ffi::CString; - - #[test] - fn test_string_to_base64_happy_path_utf8() { - // Test: "Hello" with UTF8 encoding should produce "SGVsbG8=" - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!(!result.is_null(), "Result should not be null"); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "SGVsbG8="); - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_string_to_base64_null_input_pointer() { - // Test: null input pointer should return null - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { string_to_base64(std::ptr::null(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Null input pointer should return null"); - } - - #[test] - fn test_string_to_base64_null_encoding_pointer() { - // Test: null encoding pointer should return null - let input = CString::new("Hello").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), std::ptr::null()) }; - - assert!(result.is_null(), "Null encoding pointer should return null"); - } - - #[test] - fn test_string_to_base64_invalid_encoding() { - // Test: invalid encoding name should return null - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("INVALID_ENCODING").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Invalid encoding should return null"); - } - - #[test] - fn test_string_to_base64_utf7_deprecated() { - // Test: UTF7 encoding should return null (deprecated) - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("UTF7").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - result.is_null(), - "UTF7 encoding should return null (deprecated)" - ); - } - - #[test] - fn test_string_to_base64_empty_string() { - // Test: empty string should encode successfully - let input = CString::new("").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for empty string" - ); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, ""); // Empty string encodes to empty Base64 - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_string_to_base64_large_string() { - // Test: 1MB string should encode successfully - let large_string = "A".repeat(1024 * 1024); // 1MB of 'A' characters - let input = CString::new(large_string).unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for large string" - ); - // Verify the result is a valid pointer and can be freed - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_string_to_base64_various_encodings() { - let input = CString::new("Test").unwrap(); - let encodings = vec![ - "UTF8", - "ASCII", - "Unicode", - "UTF32", - "BigEndianUnicode", - "Default", - ]; - - for enc in encodings { - let encoding = CString::new(enc).unwrap(); - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for encoding: {}", - enc - ); - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_string_to_base64_special_characters() { - // Test: string with special characters - let input = CString::new("Hello, World! 🌍").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for special characters" - ); - unsafe { crate::memory::free_string(result) }; - } - - // ========== Tests for base64_to_string ========== - - #[test] - fn test_base64_to_string_happy_path_utf8() { - // Test: decode "SGVsbG8=" with UTF8 encoding should produce "Hello" - let input = CString::new("SGVsbG8=").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { base64_to_string(input.as_ptr(), encoding.as_ptr()) }; - - assert!(!result.is_null(), "Result should not be null"); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "Hello", "Decoded string should be 'Hello'"); - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_base64_to_string_null_input_pointer() { - // Test: null input pointer should return null - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { base64_to_string(std::ptr::null(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Null input pointer should return null"); - } - - #[test] - fn test_base64_to_string_null_encoding_pointer() { - // Test: null encoding pointer should return null - let input = CString::new("SGVsbG8=").unwrap(); - - let result = unsafe { base64_to_string(input.as_ptr(), std::ptr::null()) }; - - assert!(result.is_null(), "Null encoding pointer should return null"); - } - - #[test] - fn test_base64_to_string_invalid_base64() { - // Test: invalid Base64 string should return null - let input = CString::new("Not@Valid#Base64!").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { base64_to_string(input.as_ptr(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Invalid Base64 string should return null"); - } - - #[test] - fn test_base64_to_string_invalid_encoding() { - // Test: invalid encoding name should return null - let input = CString::new("SGVsbG8=").unwrap(); - let encoding = CString::new("INVALID_ENCODING").unwrap(); - - let result = unsafe { base64_to_string(input.as_ptr(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Invalid encoding should return null"); - } - - #[test] - fn test_base64_to_string_empty_string() { - // Test: empty Base64 string should decode to empty string - let input = CString::new("").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { base64_to_string(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for empty string" - ); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "", "Empty Base64 should decode to empty string"); - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_base64_to_string_round_trip() { - // Test: encode then decode should produce original string - let original = "Test String 123!"; - let input = CString::new(original).unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - // Encode - let encoded_ptr = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - assert!(!encoded_ptr.is_null(), "Encoding should succeed"); - - // Decode - let decoded_ptr = unsafe { base64_to_string(encoded_ptr, encoding.as_ptr()) }; - assert!(!decoded_ptr.is_null(), "Decoding should succeed"); - - let decoded_str = unsafe { CStr::from_ptr(decoded_ptr).to_str().unwrap() }; - assert_eq!( - decoded_str, original, - "Round-trip should preserve original string" - ); - - unsafe { - crate::memory::free_string(encoded_ptr); - crate::memory::free_string(decoded_ptr); - }; - } - - #[test] - fn test_base64_to_string_various_encodings() { - // Test: various supported encodings should work - let test_cases = vec![ - ("SGVsbG8=", "UTF8", "Hello"), - ("VABFAFMAVAA=", "Unicode", "TEST"), // UTF-16LE encoded "TEST" - ]; - - for (base64_input, enc, expected) in test_cases { - let input = CString::new(base64_input).unwrap(); - let encoding = CString::new(enc).unwrap(); - - let result = unsafe { base64_to_string(input.as_ptr(), encoding.as_ptr()) }; - - if !result.is_null() { - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap_or("") }; - // Note: Some encodings may not round-trip perfectly, so we just verify non-null - assert!( - !result_str.is_empty() || expected.is_empty(), - "Result should not be empty for encoding: {}", - enc - ); - unsafe { crate::memory::free_string(result) }; - } - } - } - - #[test] - fn test_base64_to_string_malformed_base64() { - let malformed_inputs = vec![ - "SGVsbG8", // Missing padding - "SGVs bG8=", // Space in middle - "SGVs\nbG8=", // Newline in middle - ]; - - let encoding = CString::new("UTF8").unwrap(); - - for malformed in malformed_inputs { - let input = CString::new(malformed).unwrap(); - let result = unsafe { base64_to_string(input.as_ptr(), encoding.as_ptr()) }; - - // Some Base64 decoders are lenient, so we just verify it doesn't crash - if !result.is_null() { - unsafe { crate::memory::free_string(result) }; - } - } - } - - // ========== Tests for bytes_to_base64 ========== - - #[test] - fn test_bytes_to_base64_happy_path() { - // Test: encode byte array [72, 101, 108, 108, 111] ("Hello") to "SGVsbG8=" - let bytes: Vec = vec![72, 101, 108, 108, 111]; // "Hello" in ASCII - - let result = unsafe { bytes_to_base64(bytes.as_ptr(), bytes.len()) }; - - assert!(!result.is_null(), "Result should not be null"); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, "SGVsbG8=", - "Encoded bytes should produce 'SGVsbG8='" - ); - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_base64_null_pointer() { - // Test: null pointer should return null - let result = unsafe { bytes_to_base64(std::ptr::null(), 10) }; - - assert!(result.is_null(), "Null pointer should return null"); - } - - #[test] - fn test_bytes_to_base64_zero_length() { - // Test: zero length should encode to empty string - let bytes: Vec = vec![1, 2, 3]; // Data exists but length is 0 - - let result = unsafe { bytes_to_base64(bytes.as_ptr(), 0) }; - - assert!( - !result.is_null(), - "Result should not be null for zero length" - ); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "", "Zero length should encode to empty string"); - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_base64_large_byte_array() { - // Test: 1MB byte array should encode successfully - let large_bytes: Vec = vec![65; 1024 * 1024]; // 1MB of 'A' (ASCII 65) - - let result = unsafe { bytes_to_base64(large_bytes.as_ptr(), large_bytes.len()) }; - - assert!( - !result.is_null(), - "Result should not be null for large byte array" - ); - - // Verify the result is a valid pointer and can be freed - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert!( - !result_str.is_empty(), - "Result should not be empty for 1MB input" - ); - - // Base64 encoding increases size by ~33%, so 1MB should produce ~1.33MB - assert!( - result_str.len() > 1_000_000, - "Encoded result should be larger than 1MB" - ); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_base64_various_byte_patterns() { - // Test: various byte patterns encode correctly - let test_cases = vec![ - (vec![0u8], "AA=="), // Single zero byte - (vec![255u8], "/w=="), // Single max byte - (vec![0, 1, 2, 3, 4], "AAECAwQ="), // Sequential bytes - (vec![255, 254, 253, 252], "//79/A=="), // High bytes - ]; - - for (bytes, expected) in test_cases { - let result = unsafe { bytes_to_base64(bytes.as_ptr(), bytes.len()) }; - - assert!( - !result.is_null(), - "Result should not be null for byte pattern: {:?}", - bytes - ); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, expected, - "Byte pattern {:?} should encode to '{}'", - bytes, expected - ); - - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_bytes_to_base64_binary_data() { - // Test: arbitrary binary data (not valid UTF-8) - let binary_data: Vec = vec![ - 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG header - ]; - - let result = unsafe { bytes_to_base64(binary_data.as_ptr(), binary_data.len()) }; - - assert!( - !result.is_null(), - "Result should not be null for binary data" - ); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, "iVBORw0KGgo=", - "PNG header should encode correctly" - ); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_base64_empty_array() { - // Test: empty byte array (length 0) should encode to empty string - let empty_bytes: Vec = vec![]; - - let result = unsafe { bytes_to_base64(empty_bytes.as_ptr(), 0) }; - - assert!( - !result.is_null(), - "Result should not be null for empty array" - ); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "", "Empty array should encode to empty string"); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_base64_round_trip_with_string_to_base64() { - // Test: bytes_to_base64 should produce same result as string_to_base64 for UTF-8 text - let text = "Test String 123!"; - let bytes = text.as_bytes(); - - // Encode using bytes_to_base64 - let result_bytes = unsafe { bytes_to_base64(bytes.as_ptr(), bytes.len()) }; - assert!(!result_bytes.is_null(), "bytes_to_base64 should succeed"); - - // Encode using string_to_base64 - let text_cstring = CString::new(text).unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let result_string = unsafe { string_to_base64(text_cstring.as_ptr(), encoding.as_ptr()) }; - assert!(!result_string.is_null(), "string_to_base64 should succeed"); - - // Compare results - let bytes_result = unsafe { CStr::from_ptr(result_bytes).to_str().unwrap() }; - let string_result = unsafe { CStr::from_ptr(result_string).to_str().unwrap() }; - assert_eq!( - bytes_result, string_result, - "bytes_to_base64 and string_to_base64 should produce identical results for UTF-8 text" - ); - - unsafe { - crate::memory::free_string(result_bytes); - crate::memory::free_string(result_string); - }; - } - - #[test] - fn test_bytes_to_base64_all_byte_values() { - // Test: all possible byte values (0-255) should encode without error - let all_bytes: Vec = (0..=255).collect(); - - let result = unsafe { bytes_to_base64(all_bytes.as_ptr(), all_bytes.len()) }; - - assert!( - !result.is_null(), - "Result should not be null for all byte values" - ); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert!(!result_str.is_empty(), "Result should not be empty"); - - // Verify it's valid Base64 (only contains Base64 characters) - let valid_base64_chars = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; - for ch in result_str.chars() { - assert!( - valid_base64_chars.contains(ch), - "Result should only contain valid Base64 characters, found: {}", - ch - ); - } - - unsafe { crate::memory::free_string(result) }; - } - - // ========== Tests for base64_to_bytes ========== - - #[test] - fn test_base64_to_bytes_happy_path() { - // Test: decode "SGVsbG8=" to byte array [72, 101, 108, 108, 111] ("Hello") - let input = CString::new("SGVsbG8=").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 5, "Output length should be 5 bytes"); - - // Verify the decoded bytes - let byte_slice = unsafe { std::slice::from_raw_parts(result, out_length) }; - assert_eq!( - byte_slice, - &[72, 101, 108, 108, 111], - "Decoded bytes should be [72, 101, 108, 108, 111] (Hello)" - ); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_base64_to_bytes_null_pointer() { - // Test: null pointer should return null - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(std::ptr::null(), &mut out_length as *mut usize) }; - - assert!(result.is_null(), "Null pointer should return null"); - assert_eq!(out_length, 0, "Output length should be 0 for null input"); - } - - #[test] - fn test_base64_to_bytes_invalid_base64() { - // Test: invalid Base64 string should return null - let input = CString::new("Not@Valid#Base64!").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; - - assert!(result.is_null(), "Invalid Base64 string should return null"); - assert_eq!( - out_length, 0, - "Output length should be 0 for invalid Base64" - ); - } - - #[test] - fn test_base64_to_bytes_output_length_parameter() { - // Test: output length parameter should be correctly set - let test_cases = vec![ - ("", 0), // Empty string - ("QQ==", 1), // Single byte 'A' - ("QUJD", 3), // Three bytes 'ABC' - ("SGVsbG8=", 5), // Five bytes 'Hello' - ("VGVzdCBTdHJpbmc=", 11), // Eleven bytes 'Test String' - ]; - - for (base64_input, expected_length) in test_cases { - let input = CString::new(base64_input).unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; - - if expected_length == 0 { - // Empty string case - assert!( - !result.is_null(), - "Result should not be null for empty string" - ); - assert_eq!( - out_length, expected_length, - "Output length should be {} for input '{}'", - expected_length, base64_input - ); - } else { - assert!( - !result.is_null(), - "Result should not be null for input '{}'", - base64_input - ); - assert_eq!( - out_length, expected_length, - "Output length should be {} for input '{}'", - expected_length, base64_input - ); - } - - if !result.is_null() { - unsafe { crate::memory::free_bytes(result) }; - } - } - } - - #[test] - fn test_base64_to_bytes_empty_string() { - // Test: empty Base64 string should decode to empty byte array - let input = CString::new("").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; - - assert!( - !result.is_null(), - "Result should not be null for empty string" - ); - assert_eq!(out_length, 0, "Output length should be 0 for empty string"); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_base64_to_bytes_binary_data() { - // Test: decode Base64 to binary data (PNG header) - let input = CString::new("iVBORw0KGgo=").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; - - assert!( - !result.is_null(), - "Result should not be null for binary data" - ); - assert_eq!( - out_length, 8, - "Output length should be 8 bytes for PNG header" - ); - - let byte_slice = unsafe { std::slice::from_raw_parts(result, out_length) }; - let expected_png_header: Vec = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; - assert_eq!( - byte_slice, - expected_png_header.as_slice(), - "Decoded bytes should match PNG header" - ); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_base64_to_bytes_round_trip() { - // Test: encode bytes then decode should produce original bytes - let original_bytes: Vec = vec![0, 1, 2, 3, 4, 5, 255, 254, 253]; - - // Encode - let encoded_ptr = unsafe { bytes_to_base64(original_bytes.as_ptr(), original_bytes.len()) }; - assert!(!encoded_ptr.is_null(), "Encoding should succeed"); - - // Decode - let mut out_length: usize = 0; - let decoded_ptr = unsafe { base64_to_bytes(encoded_ptr, &mut out_length as *mut usize) }; - assert!(!decoded_ptr.is_null(), "Decoding should succeed"); - - // Verify round-trip - assert_eq!( - out_length, - original_bytes.len(), - "Decoded length should match original" - ); - let decoded_slice = unsafe { std::slice::from_raw_parts(decoded_ptr, out_length) }; - assert_eq!( - decoded_slice, - original_bytes.as_slice(), - "Round-trip should preserve original bytes" - ); - - unsafe { - crate::memory::free_string(encoded_ptr); - crate::memory::free_bytes(decoded_ptr); - }; - } - - #[test] - fn test_base64_to_bytes_large_data() { - // Test: decode large Base64 string (1MB of data) - let large_bytes: Vec = vec![65; 1024 * 1024]; // 1MB of 'A' (ASCII 65) - - // First encode to Base64 - let encoded_ptr = unsafe { bytes_to_base64(large_bytes.as_ptr(), large_bytes.len()) }; - assert!(!encoded_ptr.is_null(), "Encoding should succeed"); - - // Now decode back - let mut out_length: usize = 0; - let decoded_ptr = unsafe { base64_to_bytes(encoded_ptr, &mut out_length as *mut usize) }; - - assert!( - !decoded_ptr.is_null(), - "Decoding should succeed for large data" - ); - assert_eq!(out_length, 1024 * 1024, "Output length should be 1MB"); - - // Verify first and last bytes - let decoded_slice = unsafe { std::slice::from_raw_parts(decoded_ptr, out_length) }; - assert_eq!(decoded_slice[0], 65, "First byte should be 65"); - assert_eq!(decoded_slice[out_length - 1], 65, "Last byte should be 65"); - - unsafe { - crate::memory::free_string(encoded_ptr); - crate::memory::free_bytes(decoded_ptr); - }; - } - - #[test] - fn test_base64_to_bytes_all_byte_values() { - // Test: decode Base64 containing all possible byte values (0-255) - let all_bytes: Vec = (0..=255).collect(); - - // Encode - let encoded_ptr = unsafe { bytes_to_base64(all_bytes.as_ptr(), all_bytes.len()) }; - assert!(!encoded_ptr.is_null(), "Encoding should succeed"); - - // Decode - let mut out_length: usize = 0; - let decoded_ptr = unsafe { base64_to_bytes(encoded_ptr, &mut out_length as *mut usize) }; - - assert!( - !decoded_ptr.is_null(), - "Decoding should succeed for all byte values" - ); - assert_eq!(out_length, 256, "Output length should be 256 bytes"); - - let decoded_slice = unsafe { std::slice::from_raw_parts(decoded_ptr, out_length) }; - assert_eq!( - decoded_slice, - all_bytes.as_slice(), - "All byte values should round-trip correctly" - ); - - unsafe { - crate::memory::free_string(encoded_ptr); - crate::memory::free_bytes(decoded_ptr); - }; - } - - #[test] - fn test_base64_to_bytes_malformed_base64() { - // Test: malformed Base64 strings should return null - let malformed_inputs = vec![ - "SGVsbG8", // Missing padding (may or may not fail depending on decoder leniency) - "SGVs bG8=", // Space in middle - "SGVs\nbG8=", // Newline in middle - "!!!invalid", // Invalid characters - ]; - - for malformed in malformed_inputs { - let input = CString::new(malformed).unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; - - // Some decoders are lenient, but invalid characters should definitely fail - // We just verify it doesn't crash and handles errors gracefully - if result.is_null() { - assert_eq!(out_length, 0, "Output length should be 0 for failed decode"); - } else { - // If decoder is lenient and succeeds, just free the memory - unsafe { crate::memory::free_bytes(result) }; - } - } - } - - #[test] - fn test_base64_to_bytes_various_lengths() { - // Test: various input lengths decode correctly - let test_cases = vec![ - ("QQ==", vec![65]), // 1 byte - ("QUI=", vec![65, 66]), // 2 bytes - ("QUJD", vec![65, 66, 67]), // 3 bytes - ("QUJDRA==", vec![65, 66, 67, 68]), // 4 bytes - ("QUJDREU=", vec![65, 66, 67, 68, 69]), // 5 bytes - ]; - - for (base64_input, expected_bytes) in test_cases { - let input = CString::new(base64_input).unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; - - assert!( - !result.is_null(), - "Result should not be null for input '{}'", - base64_input - ); - assert_eq!( - out_length, - expected_bytes.len(), - "Output length should be {} for input '{}'", - expected_bytes.len(), - base64_input - ); - - let decoded_slice = unsafe { std::slice::from_raw_parts(result, out_length) }; - assert_eq!( - decoded_slice, - expected_bytes.as_slice(), - "Decoded bytes should match expected for input '{}'", - base64_input - ); - - unsafe { crate::memory::free_bytes(result) }; - } - } - - #[test] - fn test_base64_to_bytes_null_output_length_pointer() { - // Test: null output length pointer should be allowed (optional parameter) - let input = CString::new("SGVsbG8=").unwrap(); - - let result = unsafe { base64_to_bytes(input.as_ptr(), std::ptr::null_mut()) }; - - // Should succeed even with null out_length pointer - assert!( - !result.is_null(), - "Should succeed with null out_length pointer" - ); - - // Verify the data is correct - let data = unsafe { std::slice::from_raw_parts(result, 5) }; - assert_eq!(data, &[72, 101, 108, 108, 111]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_encoding_case_insensitivity_performance() { - // Test: verify that encoding names are case-insensitive - // This also documents the performance concern with to_uppercase() - let input = CString::new("Test").unwrap(); - let encoding_variants = vec![ - "utf8", "UTF8", "Utf8", "uTf8", "ascii", "ASCII", "Ascii", "unicode", "UNICODE", - "Unicode", - ]; - - for encoding in encoding_variants { - let enc_cstring = CString::new(encoding).unwrap(); - let result = unsafe { string_to_base64(input.as_ptr(), enc_cstring.as_ptr()) }; - - assert!( - !result.is_null(), - "Encoding '{}' should be recognized (case-insensitive)", - encoding - ); - - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_encoding_name_with_hyphens() { - // Test: verify that encoding names with hyphens work - let input = CString::new("Test").unwrap(); - let encoding_variants = vec![ - ("UTF-8", "UTF8"), - ("UTF-16", "UTF16"), - ("UTF-32", "UTF32"), - ("UTF-16BE", "UTF16BE"), - ]; - - for (hyphenated, non_hyphenated) in encoding_variants { - let enc1 = CString::new(hyphenated).unwrap(); - let enc2 = CString::new(non_hyphenated).unwrap(); - - let result1 = unsafe { string_to_base64(input.as_ptr(), enc1.as_ptr()) }; - let result2 = unsafe { string_to_base64(input.as_ptr(), enc2.as_ptr()) }; - - // Both should work and produce the same result - assert!(!result1.is_null(), "Encoding '{}' should work", hyphenated); - assert!( - !result2.is_null(), - "Encoding '{}' should work", - non_hyphenated - ); - - let str1 = unsafe { CStr::from_ptr(result1).to_str().unwrap() }; - let str2 = unsafe { CStr::from_ptr(result2).to_str().unwrap() }; - - assert_eq!( - str1, str2, - "Encodings '{}' and '{}' should produce identical results", - hyphenated, non_hyphenated - ); - - unsafe { - crate::memory::free_string(result1); - crate::memory::free_string(result2); - } - } - } - - #[test] - fn test_utf7_rejection_is_documented() { - // Test: UTF7 should be explicitly rejected with clear error message - let input = CString::new("Hello").unwrap(); - let utf7_variants = vec!["UTF7", "utf7", "Utf7", "UTF-7", "utf-7"]; - - for variant in utf7_variants { - let encoding = CString::new(variant).unwrap(); - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - result.is_null(), - "UTF7 variant '{}' should be rejected", - variant - ); - - // Verify error message is set - let error = unsafe { crate::error::get_last_error() }; - assert!( - !error.is_null(), - "Error message should be set for UTF7 variant '{}'", - variant - ); - - let error_str = unsafe { CStr::from_ptr(error).to_str().unwrap() }; - - // All variants should now be caught by the explicit UTF7 check - assert!( - error_str.contains("UTF7") || error_str.contains("deprecated"), - "Error message for '{}' should mention UTF7 or deprecated, got: {}", - variant, - error_str - ); - - unsafe { crate::memory::free_string(error) }; - } - } - - #[test] - fn test_ascii_encoding_rejects_non_ascii() { - // Test: ASCII encoding should reject strings with non-ASCII characters - let non_ascii_strings = vec![ - "Hello 🌍", // Emoji - "Café", // Accented character - "日本語", // Japanese - "Hello\u{0080}", // First non-ASCII character - ]; - - let encoding = CString::new("ASCII").unwrap(); - - for test_str in non_ascii_strings { - let input = CString::new(test_str).unwrap(); - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - result.is_null(), - "ASCII encoding should reject non-ASCII string: {}", - test_str - ); - - // Verify error message mentions non-ASCII - let error = unsafe { crate::error::get_last_error() }; - assert!(!error.is_null(), "Error should be set for non-ASCII input"); - - let error_str = unsafe { CStr::from_ptr(error).to_str().unwrap() }; - assert!( - error_str.contains("ASCII") || error_str.contains("non-ASCII"), - "Error should mention ASCII issue, got: {}", - error_str - ); - - unsafe { crate::memory::free_string(error) }; - } - } - - #[test] - fn test_ascii_encoding_accepts_valid_ascii() { - // Test: ASCII encoding should accept valid ASCII strings - let ascii_strings = vec![ - "Hello", - "123", - "!@#$%^&*()", - "The quick brown fox", - "\t\n\r", - ]; - - let encoding = CString::new("ASCII").unwrap(); - - for test_str in ascii_strings { - let input = CString::new(test_str).unwrap(); - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "ASCII encoding should accept valid ASCII string: {}", - test_str - ); - - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_utf16_byte_order() { - // Test: verify UTF-16 uses little-endian byte order (UTF-16LE) - let input = CString::new("A").unwrap(); // 'A' = U+0041 - let encoding = CString::new("UTF16").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - assert!(!result.is_null(), "UTF16 encoding should succeed"); - - // Decode to verify byte order - let mut out_length: usize = 0; - let bytes_ptr = unsafe { base64_to_bytes(result, &mut out_length as *mut usize) }; - - assert_eq!(out_length, 2, "UTF-16 'A' should be 2 bytes"); - let bytes = unsafe { std::slice::from_raw_parts(bytes_ptr, out_length) }; - - // UTF-16LE: 'A' (U+0041) = [0x41, 0x00] - assert_eq!(bytes[0], 0x41, "First byte should be 0x41 (little-endian)"); - assert_eq!(bytes[1], 0x00, "Second byte should be 0x00"); - - unsafe { - crate::memory::free_string(result); - crate::memory::free_bytes(bytes_ptr); - } - } - - #[test] - fn test_utf16be_byte_order() { - // Test: verify UTF-16BE uses big-endian byte order - let input = CString::new("A").unwrap(); // 'A' = U+0041 - let encoding = CString::new("BigEndianUnicode").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - assert!(!result.is_null(), "UTF16BE encoding should succeed"); - - // Decode to verify byte order - let mut out_length: usize = 0; - let bytes_ptr = unsafe { base64_to_bytes(result, &mut out_length as *mut usize) }; - - assert_eq!(out_length, 2, "UTF-16BE 'A' should be 2 bytes"); - let bytes = unsafe { std::slice::from_raw_parts(bytes_ptr, out_length) }; - - // UTF-16BE: 'A' (U+0041) = [0x00, 0x41] - assert_eq!(bytes[0], 0x00, "First byte should be 0x00 (big-endian)"); - assert_eq!(bytes[1], 0x41, "Second byte should be 0x41"); - - unsafe { - crate::memory::free_string(result); - crate::memory::free_bytes(bytes_ptr); - } - } - - #[test] - fn test_utf32_encoding_size() { - // Test: verify UTF-32 uses 4 bytes per character - let input = CString::new("AB").unwrap(); - let encoding = CString::new("UTF32").unwrap(); - - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - assert!(!result.is_null(), "UTF32 encoding should succeed"); - - // Decode to verify size - let mut out_length: usize = 0; - let bytes_ptr = unsafe { base64_to_bytes(result, &mut out_length as *mut usize) }; - - assert_eq!( - out_length, 8, - "UTF-32 'AB' should be 8 bytes (2 chars × 4 bytes)" - ); - - unsafe { - crate::memory::free_string(result); - crate::memory::free_bytes(bytes_ptr); - } - } - - #[test] - fn test_default_encoding_is_utf8() { - // Test: verify that "Default" encoding behaves like UTF-8 - let input = CString::new("Hello 🌍").unwrap(); - - let utf8_enc = CString::new("UTF8").unwrap(); - let default_enc = CString::new("Default").unwrap(); - - let result_utf8 = unsafe { string_to_base64(input.as_ptr(), utf8_enc.as_ptr()) }; - let result_default = unsafe { string_to_base64(input.as_ptr(), default_enc.as_ptr()) }; - - assert!(!result_utf8.is_null(), "UTF8 encoding should succeed"); - assert!(!result_default.is_null(), "Default encoding should succeed"); - - let str_utf8 = unsafe { CStr::from_ptr(result_utf8).to_str().unwrap() }; - let str_default = unsafe { CStr::from_ptr(result_default).to_str().unwrap() }; - - assert_eq!( - str_utf8, str_default, - "Default encoding should produce same result as UTF8" - ); - - unsafe { - crate::memory::free_string(result_utf8); - crate::memory::free_string(result_default); - } - } - - #[test] - fn test_encoding_with_invalid_utf8_in_encoding_name() { - // Test: verify handling of invalid UTF-8 in encoding parameter - // This is a safety test for the encoding parameter validation - let input = CString::new("Hello").unwrap(); - - // Create a CString with invalid UTF-8 (this is tricky, as CString validates) - // Instead, we test that the UTF-8 validation in the function works - // by ensuring valid encodings work - let valid_encodings = vec!["UTF8", "ASCII", "Unicode"]; - - for enc in valid_encodings { - let encoding = CString::new(enc).unwrap(); - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - assert!(!result.is_null(), "Valid encoding '{}' should work", enc); - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_base64_to_bytes_null_output_length_allowed() { - // Test: null output length pointer should be allowed (optional parameter) - let input = CString::new("SGVsbG8=").unwrap(); - - // Should work even with null out_length pointer - let result = unsafe { base64_to_bytes(input.as_ptr(), std::ptr::null_mut()) }; - - assert!( - !result.is_null(), - "Should succeed even with null out_length pointer" - ); - - // We can still verify the data is correct by reading it - // (we know "SGVsbG8=" decodes to "Hello" which is 5 bytes) - let data = unsafe { std::slice::from_raw_parts(result, 5) }; - assert_eq!(data, &[72, 101, 108, 108, 111]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_concurrent_base64_operations() { - use std::thread; - - // Test: multiple threads using base64 functions concurrently - // Error handling should be thread-safe - let handles: Vec<_> = (0..10) - .map(|i| { - thread::spawn(move || { - let input = CString::new(format!("test{}", i)).unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - // Encode - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - assert!(!result.is_null(), "Encoding should succeed in thread {}", i); - - // Decode - let decoded = unsafe { base64_to_string(result, encoding.as_ptr()) }; - assert!( - !decoded.is_null(), - "Decoding should succeed in thread {}", - i - ); - - let decoded_str = unsafe { CStr::from_ptr(decoded).to_str().unwrap() }; - assert_eq!(decoded_str, format!("test{}", i)); - - unsafe { - crate::memory::free_string(result); - crate::memory::free_string(decoded); - } - }) - }) - .collect(); - - for handle in handles { - handle.join().unwrap(); - } - } - - #[test] - fn test_concurrent_error_isolation() { - use std::sync::Arc; - use std::sync::atomic::{AtomicBool, Ordering}; - use std::thread; - - // Test: errors in one thread don't affect other threads - let success_flag = Arc::new(AtomicBool::new(true)); - - let handles: Vec<_> = (0..5) - .map(|i| { - let flag = Arc::clone(&success_flag); - thread::spawn(move || { - // Thread with even ID will succeed, odd will fail - if i % 2 == 0 { - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; - - if result.is_null() { - flag.store(false, Ordering::SeqCst); - } else { - unsafe { crate::memory::free_string(result) }; - } - } else { - // Trigger an error - let encoding = CString::new("UTF8").unwrap(); - let result = - unsafe { string_to_base64(std::ptr::null(), encoding.as_ptr()) }; - - // Should be null due to error - if !result.is_null() { - flag.store(false, Ordering::SeqCst); - } - - // Check that error is set for THIS thread - let error = unsafe { crate::error::get_last_error() }; - if error.is_null() { - flag.store(false, Ordering::SeqCst); - } else { - unsafe { crate::memory::free_string(error) }; - } - } - }) - }) - .collect(); - - for handle in handles { - handle.join().unwrap(); - } - - assert!( - success_flag.load(Ordering::SeqCst), - "All threads should handle errors correctly" - ); - } - - // ========== Tests for Latin-1 encoding and fallback ========== - - #[test] - fn test_latin1_encoding_direct() { - // Test: Latin-1 encoding should handle any byte value (0x00-0xFF) - // Note: Null byte (0x00) is replaced with replacement character for C string safety - let all_bytes: Vec = (0..=255).collect(); - let result = convert_bytes_to_string(&all_bytes, "ISO-8859-1"); - - assert!(result.is_ok(), "Latin-1 should accept any byte value"); - let s = result.unwrap(); - - // The string has 256 Unicode code points (chars), but may have more bytes - // because characters > 127 are multi-byte in UTF-8 - assert_eq!(s.chars().count(), 256, "Result should have 256 characters"); - - // Verify mapping: each byte should map to its corresponding Unicode code point - // except for null byte (0x00) which maps to replacement character (U+FFFD) - for (i, ch) in s.chars().enumerate() { - if i == 0 { - assert_eq!( - ch, '\u{FFFD}', - "Null byte should map to replacement character" - ); - } else { - assert_eq!( - ch as u32, i as u32, - "Byte {} should map to Unicode code point {}", - i, i - ); - } - } - } - - #[test] - fn test_latin1_encoding_variants() { - // Test: all Latin-1 encoding name variants should work - let test_bytes: Vec = vec![0xA1, 0xC0, 0xFF]; - let variants = vec!["ISO-8859-1", "LATIN1", "Latin-1", "latin1", "iso-8859-1"]; - - for variant in variants { - let result = convert_bytes_to_string(&test_bytes, variant); - assert!( - result.is_ok(), - "Latin-1 variant '{}' should be recognized", - variant - ); - } - } - - #[test] - fn test_convert_bytes_to_string_with_fallback_utf8_success() { - // Test: valid UTF-8 should succeed without fallback - let utf8_bytes = "Hello".as_bytes().to_vec(); - let result = convert_bytes_to_string_with_fallback(&utf8_bytes, "UTF8"); - - assert!(result.is_ok(), "Valid UTF-8 should succeed"); - assert_eq!(result.unwrap(), "Hello"); - } - - #[test] - fn test_convert_bytes_to_string_with_fallback_invalid_utf8() { - // Test: invalid UTF-8 should fall back to Latin-1 - // Note: null bytes (0x00) are replaced with replacement character - let invalid_utf8: Vec = vec![0xA1, 0x59, 0xC0, 0xA5, 0xE4, 0x94, 0xFF, 0x80]; - let result = convert_bytes_to_string_with_fallback(&invalid_utf8, "UTF8"); - - assert!( - result.is_ok(), - "Invalid UTF-8 should fall back to Latin-1 and succeed" - ); - let s = result.unwrap(); - - // Verify the string can be converted back to the original bytes via Latin-1 - let round_trip: Vec = s.chars().map(|c| c as u8).collect(); - assert_eq!( - round_trip, invalid_utf8, - "Latin-1 fallback should preserve original bytes" - ); - } - - #[test] - fn test_convert_bytes_to_string_with_fallback_non_ascii() { - // Test: non-ASCII bytes with ASCII encoding should fall back to Latin-1 - let non_ascii: Vec = vec![72, 200, 111]; // 'H', 0xC8, 'o' - let result = convert_bytes_to_string_with_fallback(&non_ascii, "ASCII"); - - assert!( - result.is_ok(), - "Non-ASCII bytes should fall back to Latin-1" - ); - } - - #[test] - fn test_convert_bytes_to_string_with_fallback_structural_errors_fallback() { - // Test: structural errors (wrong byte length for UTF-16) should also fall back to Latin-1 - // This is the desired behavior - binary data may have any length - // Note: null bytes are replaced with replacement character, so we use non-null bytes - let odd_bytes: Vec = vec![72, 65, 101]; // Odd length for UTF-16, no null bytes - let result = convert_bytes_to_string_with_fallback(&odd_bytes, "Unicode"); - - assert!( - result.is_ok(), - "Odd-length bytes should fall back to Latin-1 for UTF-16" - ); - - // Verify round-trip - let s = result.unwrap(); - let round_trip: Vec = s.chars().map(|c| c as u8).collect(); - assert_eq!( - round_trip, odd_bytes, - "Should preserve original bytes via Latin-1" - ); - } - - #[test] - fn test_convert_bytes_to_string_with_fallback_unsupported_encoding() { - // Test: unsupported encoding should propagate error - let bytes: Vec = vec![72, 101, 108, 108, 111]; - let result = convert_bytes_to_string_with_fallback(&bytes, "INVALID_ENCODING"); - - assert!( - result.is_err(), - "Unsupported encoding should propagate error" - ); - assert!( - result.unwrap_err().contains("Unsupported encoding"), - "Error should mention unsupported encoding" - ); - } - - #[test] - fn test_convert_bytes_to_string_with_fallback_binary_data_round_trip() { - // Test: binary data (like certificate bytes) should round-trip through Latin-1 fallback - // This simulates the Get-SecureBootUEFI scenario from issue #24 - // Note: null bytes (0x00) are replaced with replacement character, so we exclude them - let binary_data: Vec = vec![ - 0x89, 0x50, 0x4E, 0x47, // PNG-like header - 0x0D, 0x0A, 0x1A, 0x0A, 0xFF, 0xFE, 0x80, // Various binary bytes (no null) - ]; - - let result = convert_bytes_to_string_with_fallback(&binary_data, "UTF8"); - assert!( - result.is_ok(), - "Binary data should succeed via Latin-1 fallback" - ); - - let s = result.unwrap(); - let round_trip: Vec = s.chars().map(|c| c as u8).collect(); - assert_eq!( - round_trip, binary_data, - "Binary data should round-trip correctly" - ); - } - - #[test] - fn test_convert_bytes_to_string_with_fallback_null_bytes_replaced() { - // Test: null bytes are replaced with Unicode replacement character - let data_with_null: Vec = vec![0xA1, 0x00, 0xC0]; - - let result = convert_bytes_to_string_with_fallback(&data_with_null, "UTF8"); - assert!(result.is_ok(), "Data with null should succeed"); - - let s = result.unwrap(); - assert_eq!(s.chars().count(), 3, "Should have 3 characters"); - assert_eq!( - s.chars().next().unwrap(), - '\u{00A1}', - "First char should be Latin-1 0xA1" - ); - assert_eq!( - s.chars().nth(1).unwrap(), - '\u{FFFD}', - "Null byte should be replacement char" - ); - assert_eq!( - s.chars().nth(2).unwrap(), - '\u{00C0}', - "Third char should be Latin-1 0xC0" - ); - } -} diff --git a/lib/src/base64/bytes_ops.rs b/lib/src/base64/bytes_ops.rs new file mode 100644 index 0000000..5a85761 --- /dev/null +++ b/lib/src/base64/bytes_ops.rs @@ -0,0 +1,184 @@ +//! Byte array-based Base64 encoding and decoding functions + +use base64::{Engine as _, engine::general_purpose}; +use std::ffi::{CStr, CString}; +use std::os::raw::c_char; + +/// Convert a byte array to Base64 encoding +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `bytes` is a valid pointer to a byte array or null +/// - `length` accurately represents the number of bytes to read +/// - The returned pointer must be freed using `free_string` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn bytes_to_base64(bytes: *const u8, length: usize) -> *mut c_char { + if bytes.is_null() { + crate::error::set_error("Byte array pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if length == 0 { + match CString::new("") { + Ok(c_str) => { + crate::error::clear_error(); + return c_str.into_raw(); + } + Err(_) => { + crate::error::set_error("Failed to create empty C string".to_string()); + return std::ptr::null_mut(); + } + } + } + + let byte_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; + let encoded = general_purpose::STANDARD.encode(byte_slice); + + match CString::new(encoded) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error("Failed to create C string from Base64 result".to_string()); + std::ptr::null_mut() + } + } +} + +/// Convert a Base64 string to a byte array +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `input` is a valid null-terminated C string or null +/// - `out_length` is a valid pointer to a usize or null (optional) +/// - The returned pointer must be freed using `free_bytes` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn base64_to_bytes(input: *const c_char, out_length: *mut usize) -> *mut u8 { + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + }; + + if input_str.is_empty() { + crate::error::clear_error(); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return crate::memory::allocate_byte_array(Vec::::new()); + } + + let decoded_bytes = match general_purpose::STANDARD.decode(input_str) { + Ok(bytes) => bytes, + Err(e) => { + crate::error::set_error(format!("Failed to decode Base64: {}", e)); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + }; + + let length = decoded_bytes.len(); + if !out_length.is_null() { + unsafe { + *out_length = length; + } + } + + crate::error::clear_error(); + crate::memory::allocate_byte_array(decoded_bytes) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + #[test] + fn test_bytes_to_base64_happy_path() { + let bytes: Vec = vec![72, 101, 108, 108, 111]; + let result = unsafe { bytes_to_base64(bytes.as_ptr(), bytes.len()) }; + assert!(!result.is_null()); + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, "SGVsbG8="); + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_bytes_to_base64_null_pointer() { + let result = unsafe { bytes_to_base64(std::ptr::null(), 10) }; + assert!(result.is_null()); + } + + #[test] + fn test_bytes_to_base64_zero_length() { + let bytes: Vec = vec![1, 2, 3]; + let result = unsafe { bytes_to_base64(bytes.as_ptr(), 0) }; + assert!(!result.is_null()); + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, ""); + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_base64_to_bytes_happy_path() { + let input = CString::new("SGVsbG8=").unwrap(); + let mut out_length: usize = 0; + let result = unsafe { base64_to_bytes(input.as_ptr(), &mut out_length as *mut usize) }; + assert!(!result.is_null()); + assert_eq!(out_length, 5); + let byte_slice = unsafe { std::slice::from_raw_parts(result, out_length) }; + assert_eq!(byte_slice, &[72, 101, 108, 108, 111]); + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_base64_to_bytes_null_pointer() { + let mut out_length: usize = 0; + let result = unsafe { base64_to_bytes(std::ptr::null(), &mut out_length as *mut usize) }; + assert!(result.is_null()); + assert_eq!(out_length, 0); + } + + #[test] + fn test_base64_to_bytes_round_trip() { + let original_bytes: Vec = vec![0, 1, 2, 3, 4, 5, 255, 254, 253]; + let encoded_ptr = unsafe { bytes_to_base64(original_bytes.as_ptr(), original_bytes.len()) }; + assert!(!encoded_ptr.is_null()); + let mut out_length: usize = 0; + let decoded_ptr = unsafe { base64_to_bytes(encoded_ptr, &mut out_length as *mut usize) }; + assert!(!decoded_ptr.is_null()); + assert_eq!(out_length, original_bytes.len()); + let decoded_slice = unsafe { std::slice::from_raw_parts(decoded_ptr, out_length) }; + assert_eq!(decoded_slice, original_bytes.as_slice()); + unsafe { + crate::memory::free_string(encoded_ptr); + crate::memory::free_bytes(decoded_ptr); + }; + } +} diff --git a/lib/src/base64/encoding.rs b/lib/src/base64/encoding.rs new file mode 100644 index 0000000..e1e73f2 --- /dev/null +++ b/lib/src/base64/encoding.rs @@ -0,0 +1,334 @@ +//! Encoding conversion helper functions + +/// Convert a Rust string to bytes using the specified encoding +pub(crate) fn convert_string_to_bytes(input: &str, encoding: &str) -> Result, String> { + // Use eq_ignore_ascii_case to avoid allocating with to_uppercase() + if encoding.eq_ignore_ascii_case("UTF8") || encoding.eq_ignore_ascii_case("UTF-8") { + Ok(input.as_bytes().to_vec()) + } else if encoding.eq_ignore_ascii_case("ASCII") { + // Validate that all characters are ASCII + if input.is_ascii() { + Ok(input.as_bytes().to_vec()) + } else { + Err("String contains non-ASCII characters".to_string()) + } + } else if encoding.eq_ignore_ascii_case("UNICODE") + || encoding.eq_ignore_ascii_case("UTF16") + || encoding.eq_ignore_ascii_case("UTF-16") + { + // Unicode in .NET typically means UTF-16LE + let utf16: Vec = input.encode_utf16().collect(); + let mut bytes = Vec::with_capacity(utf16.len() * 2); + for word in utf16 { + bytes.push((word & 0xFF) as u8); + bytes.push((word >> 8) as u8); + } + Ok(bytes) + } else if encoding.eq_ignore_ascii_case("UTF32") || encoding.eq_ignore_ascii_case("UTF-32") { + // UTF-32LE encoding + let mut bytes = Vec::with_capacity(input.chars().count() * 4); + for ch in input.chars() { + let code_point = ch as u32; + bytes.push((code_point & 0xFF) as u8); + bytes.push(((code_point >> 8) & 0xFF) as u8); + bytes.push(((code_point >> 16) & 0xFF) as u8); + bytes.push(((code_point >> 24) & 0xFF) as u8); + } + Ok(bytes) + } else if encoding.eq_ignore_ascii_case("BIGENDIANUNICODE") + || encoding.eq_ignore_ascii_case("UTF16BE") + || encoding.eq_ignore_ascii_case("UTF-16BE") + { + // UTF-16BE encoding + let utf16: Vec = input.encode_utf16().collect(); + let mut bytes = Vec::with_capacity(utf16.len() * 2); + for word in utf16 { + bytes.push((word >> 8) as u8); + bytes.push((word & 0xFF) as u8); + } + Ok(bytes) + } else if encoding.eq_ignore_ascii_case("DEFAULT") { + // Default encoding is UTF-8 + Ok(input.as_bytes().to_vec()) + } else { + Err(format!("Unsupported encoding: {}", encoding)) + } +} + +/// Convert bytes to a Rust string using the specified encoding +pub(crate) fn convert_bytes_to_string(bytes: &[u8], encoding: &str) -> Result { + // Use eq_ignore_ascii_case to avoid allocating with to_uppercase() + if encoding.eq_ignore_ascii_case("UTF8") || encoding.eq_ignore_ascii_case("UTF-8") { + String::from_utf8(bytes.to_vec()).map_err(|e| format!("Invalid UTF-8 bytes: {}", e)) + } else if encoding.eq_ignore_ascii_case("ASCII") { + // Validate that all bytes are ASCII + if bytes.iter().all(|&b| b < 128) { + String::from_utf8(bytes.to_vec()).map_err(|e| format!("Invalid ASCII bytes: {}", e)) + } else { + Err("Bytes contain non-ASCII values".to_string()) + } + } else if encoding.eq_ignore_ascii_case("UNICODE") + || encoding.eq_ignore_ascii_case("UTF16") + || encoding.eq_ignore_ascii_case("UTF-16") + { + // Unicode in .NET typically means UTF-16LE + if !bytes.len().is_multiple_of(2) { + return Err("Invalid UTF-16 byte length (must be even)".to_string()); + } + + let mut utf16_chars = Vec::with_capacity(bytes.len() / 2); + for chunk in bytes.chunks_exact(2) { + let word = u16::from_le_bytes([chunk[0], chunk[1]]); + utf16_chars.push(word); + } + + String::from_utf16(&utf16_chars).map_err(|e| format!("Invalid UTF-16 bytes: {}", e)) + } else if encoding.eq_ignore_ascii_case("UTF32") || encoding.eq_ignore_ascii_case("UTF-32") { + // UTF-32LE encoding + if !bytes.len().is_multiple_of(4) { + return Err("Invalid UTF-32 byte length (must be multiple of 4)".to_string()); + } + + let mut result = String::new(); + for chunk in bytes.chunks_exact(4) { + let code_point = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]); + match char::from_u32(code_point) { + Some(ch) => result.push(ch), + None => return Err(format!("Invalid UTF-32 code point: {}", code_point)), + } + } + Ok(result) + } else if encoding.eq_ignore_ascii_case("BIGENDIANUNICODE") + || encoding.eq_ignore_ascii_case("UTF16BE") + || encoding.eq_ignore_ascii_case("UTF-16BE") + { + // UTF-16BE encoding + if !bytes.len().is_multiple_of(2) { + return Err("Invalid UTF-16BE byte length (must be even)".to_string()); + } + + let mut utf16_chars = Vec::with_capacity(bytes.len() / 2); + for chunk in bytes.chunks_exact(2) { + let word = u16::from_be_bytes([chunk[0], chunk[1]]); + utf16_chars.push(word); + } + + String::from_utf16(&utf16_chars).map_err(|e| format!("Invalid UTF-16BE bytes: {}", e)) + } else if encoding.eq_ignore_ascii_case("DEFAULT") { + // Default encoding is UTF-8 + String::from_utf8(bytes.to_vec()).map_err(|e| format!("Invalid UTF-8 bytes: {}", e)) + } else if encoding.eq_ignore_ascii_case("ISO-8859-1") + || encoding.eq_ignore_ascii_case("LATIN1") + || encoding.eq_ignore_ascii_case("LATIN-1") + { + // Latin-1 (ISO-8859-1) - each byte maps directly to a Unicode code point + // This encoding can represent any byte value (0x00-0xFF) + // Note: Null bytes (0x00) are replaced with Unicode replacement character (U+FFFD) + // to ensure the result can be safely passed through C string interfaces + Ok(bytes + .iter() + .map(|&b| if b == 0 { '\u{FFFD}' } else { b as char }) + .collect()) + } else { + Err(format!("Unsupported encoding: {}", encoding)) + } +} + +/// Convert bytes to a Rust string with automatic fallback to Latin-1 for binary data +/// +/// This function first attempts to decode using the specified encoding. If that fails +/// due to invalid byte sequences (common with binary data like certificates), it +/// automatically falls back to Latin-1 (ISO-8859-1) which can represent any byte value. +/// +/// Note: Null bytes (0x00) are replaced with the Unicode replacement character (U+FFFD) +/// to ensure the result can be safely passed through C string interfaces. +pub(crate) fn convert_bytes_to_string_with_fallback( + bytes: &[u8], + encoding: &str, +) -> Result { + match convert_bytes_to_string(bytes, encoding) { + Ok(s) => Ok(s), + Err(e) => { + // Check if this is an encoding error that Latin-1 fallback can handle + if e.contains("Invalid UTF-8") + || e.contains("Invalid ASCII") + || e.contains("Invalid UTF-16") + || e.contains("Invalid UTF-32") + || e.contains("non-ASCII values") + { + // Fall back to Latin-1 which can represent any byte + // Replace null bytes with replacement character for C string safety + Ok(bytes + .iter() + .map(|&b| if b == 0 { '\u{FFFD}' } else { b as char }) + .collect()) + } else { + // Other errors (unsupported encoding, wrong byte length) should propagate + Err(e) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_latin1_encoding_direct() { + let all_bytes: Vec = (0..=255).collect(); + let result = convert_bytes_to_string(&all_bytes, "ISO-8859-1"); + + assert!(result.is_ok(), "Latin-1 should accept any byte value"); + let s = result.unwrap(); + + assert_eq!(s.chars().count(), 256, "Result should have 256 characters"); + + for (i, ch) in s.chars().enumerate() { + if i == 0 { + assert_eq!( + ch, '\u{FFFD}', + "Null byte should map to replacement character" + ); + } else { + assert_eq!( + ch as u32, i as u32, + "Byte {} should map to Unicode code point {}", + i, i + ); + } + } + } + + #[test] + fn test_latin1_encoding_variants() { + let test_bytes: Vec = vec![0xA1, 0xC0, 0xFF]; + let variants = vec!["ISO-8859-1", "LATIN1", "Latin-1", "latin1", "iso-8859-1"]; + + for variant in variants { + let result = convert_bytes_to_string(&test_bytes, variant); + assert!( + result.is_ok(), + "Latin-1 variant '{}' should be recognized", + variant + ); + } + } + + #[test] + fn test_convert_bytes_to_string_with_fallback_utf8_success() { + let utf8_bytes = "Hello".as_bytes().to_vec(); + let result = convert_bytes_to_string_with_fallback(&utf8_bytes, "UTF8"); + + assert!(result.is_ok(), "Valid UTF-8 should succeed"); + assert_eq!(result.unwrap(), "Hello"); + } + + #[test] + fn test_convert_bytes_to_string_with_fallback_invalid_utf8() { + let invalid_utf8: Vec = vec![0xA1, 0x59, 0xC0, 0xA5, 0xE4, 0x94, 0xFF, 0x80]; + let result = convert_bytes_to_string_with_fallback(&invalid_utf8, "UTF8"); + + assert!( + result.is_ok(), + "Invalid UTF-8 should fall back to Latin-1 and succeed" + ); + let s = result.unwrap(); + + let round_trip: Vec = s.chars().map(|c| c as u8).collect(); + assert_eq!( + round_trip, invalid_utf8, + "Latin-1 fallback should preserve original bytes" + ); + } + + #[test] + fn test_convert_bytes_to_string_with_fallback_non_ascii() { + let non_ascii: Vec = vec![72, 200, 111]; + let result = convert_bytes_to_string_with_fallback(&non_ascii, "ASCII"); + + assert!( + result.is_ok(), + "Non-ASCII bytes should fall back to Latin-1" + ); + } + + #[test] + fn test_convert_bytes_to_string_with_fallback_structural_errors_fallback() { + let odd_bytes: Vec = vec![72, 65, 101]; + let result = convert_bytes_to_string_with_fallback(&odd_bytes, "Unicode"); + + assert!( + result.is_ok(), + "Odd-length bytes should fall back to Latin-1 for UTF-16" + ); + + let s = result.unwrap(); + let round_trip: Vec = s.chars().map(|c| c as u8).collect(); + assert_eq!( + round_trip, odd_bytes, + "Should preserve original bytes via Latin-1" + ); + } + + #[test] + fn test_convert_bytes_to_string_with_fallback_unsupported_encoding() { + let bytes: Vec = vec![72, 101, 108, 108, 111]; + let result = convert_bytes_to_string_with_fallback(&bytes, "INVALID_ENCODING"); + + assert!( + result.is_err(), + "Unsupported encoding should propagate error" + ); + assert!( + result.unwrap_err().contains("Unsupported encoding"), + "Error should mention unsupported encoding" + ); + } + + #[test] + fn test_convert_bytes_to_string_with_fallback_binary_data_round_trip() { + let binary_data: Vec = vec![ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0xFF, 0xFE, 0x80, + ]; + + let result = convert_bytes_to_string_with_fallback(&binary_data, "UTF8"); + assert!( + result.is_ok(), + "Binary data should succeed via Latin-1 fallback" + ); + + let s = result.unwrap(); + let round_trip: Vec = s.chars().map(|c| c as u8).collect(); + assert_eq!( + round_trip, binary_data, + "Binary data should round-trip correctly" + ); + } + + #[test] + fn test_convert_bytes_to_string_with_fallback_null_bytes_replaced() { + let data_with_null: Vec = vec![0xA1, 0x00, 0xC0]; + + let result = convert_bytes_to_string_with_fallback(&data_with_null, "UTF8"); + assert!(result.is_ok(), "Data with null should succeed"); + + let s = result.unwrap(); + assert_eq!(s.chars().count(), 3, "Should have 3 characters"); + assert_eq!( + s.chars().next().unwrap(), + '\u{00A1}', + "First char should be Latin-1 0xA1" + ); + assert_eq!( + s.chars().nth(1).unwrap(), + '\u{FFFD}', + "Null byte should be replacement char" + ); + assert_eq!( + s.chars().nth(2).unwrap(), + '\u{00C0}', + "Third char should be Latin-1 0xC0" + ); + } +} diff --git a/lib/src/base64/mod.rs b/lib/src/base64/mod.rs new file mode 100644 index 0000000..7223c26 --- /dev/null +++ b/lib/src/base64/mod.rs @@ -0,0 +1,14 @@ +//! Base64 encoding and decoding functions + +mod bytes_ops; +mod encoding; +mod string_ops; + +// Re-export public FFI functions +pub use bytes_ops::{base64_to_bytes, bytes_to_base64}; +pub use string_ops::{base64_to_string, base64_to_string_lenient, string_to_base64}; + +// Re-export encoding helpers for use by other modules +pub(crate) use encoding::{ + convert_bytes_to_string, convert_bytes_to_string_with_fallback, convert_string_to_bytes, +}; diff --git a/lib/src/base64/string_ops.rs b/lib/src/base64/string_ops.rs new file mode 100644 index 0000000..9f0dc7f --- /dev/null +++ b/lib/src/base64/string_ops.rs @@ -0,0 +1,299 @@ +//! String-based Base64 encoding and decoding functions + +use super::encoding::{ + convert_bytes_to_string, convert_bytes_to_string_with_fallback, convert_string_to_bytes, +}; +use base64::{Engine as _, engine::general_purpose}; +use std::ffi::{CStr, CString}; +use std::os::raw::c_char; + +/// Convert a string to Base64 encoding +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `input` is a valid null-terminated C string or null +/// - `encoding` is a valid null-terminated C string or null +/// - The returned pointer must be freed using `free_string` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn string_to_base64( + input: *const c_char, + encoding: *const c_char, +) -> *mut c_char { + // Validate null pointers + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + // Convert C strings to Rust strings + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + // Check for deprecated UTF7 encoding (both UTF7 and UTF-7 variants) + if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { + crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); + return std::ptr::null_mut(); + } + + let bytes = match convert_string_to_bytes(input_str, encoding_str) { + Ok(b) => b, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + let encoded = general_purpose::STANDARD.encode(&bytes); + + match CString::new(encoded) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error("Failed to create C string from Base64 result".to_string()); + std::ptr::null_mut() + } + } +} + +/// Convert a Base64 string back to a regular string +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `input` is a valid null-terminated C string or null +/// - `encoding` is a valid null-terminated C string or null +/// - The returned pointer must be freed using `free_string` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn base64_to_string( + input: *const c_char, + encoding: *const c_char, +) -> *mut c_char { + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + let decoded_bytes = match general_purpose::STANDARD.decode(input_str) { + Ok(bytes) => bytes, + Err(e) => { + crate::error::set_error(format!("Failed to decode Base64: {}", e)); + return std::ptr::null_mut(); + } + }; + + let result_string = match convert_bytes_to_string(&decoded_bytes, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match CString::new(result_string) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error("Failed to create C string from decoded result".to_string()); + std::ptr::null_mut() + } + } +} + +/// Decode a Base64 string to a string with Latin-1 fallback for binary data +/// +/// Lenient version that automatically falls back to Latin-1 (ISO-8859-1) encoding +/// when the decoded bytes are invalid for the specified encoding. +/// +/// # Safety +/// Same safety requirements as `base64_to_string`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn base64_to_string_lenient( + input: *const c_char, + encoding: *const c_char, +) -> *mut c_char { + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + let decoded_bytes = match general_purpose::STANDARD.decode(input_str) { + Ok(bytes) => bytes, + Err(e) => { + crate::error::set_error(format!("Failed to decode Base64: {}", e)); + return std::ptr::null_mut(); + } + }; + + let result_string = match convert_bytes_to_string_with_fallback(&decoded_bytes, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match CString::new(result_string) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error("Failed to create C string from decoded result".to_string()); + std::ptr::null_mut() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + #[test] + fn test_string_to_base64_happy_path_utf8() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; + + assert!(!result.is_null()); + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, "SGVsbG8="); + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_string_to_base64_null_input_pointer() { + let encoding = CString::new("UTF8").unwrap(); + let result = unsafe { string_to_base64(std::ptr::null(), encoding.as_ptr()) }; + assert!(result.is_null()); + } + + #[test] + fn test_string_to_base64_null_encoding_pointer() { + let input = CString::new("Hello").unwrap(); + let result = unsafe { string_to_base64(input.as_ptr(), std::ptr::null()) }; + assert!(result.is_null()); + } + + #[test] + fn test_string_to_base64_invalid_encoding() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("INVALID_ENCODING").unwrap(); + let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; + assert!(result.is_null()); + } + + #[test] + fn test_string_to_base64_utf7_deprecated() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("UTF7").unwrap(); + let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; + assert!(result.is_null()); + } + + #[test] + fn test_string_to_base64_empty_string() { + let input = CString::new("").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; + assert!(!result.is_null()); + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, ""); + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_string_to_base64_large_string() { + let large_string = "A".repeat(1024 * 1024); + let input = CString::new(large_string).unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; + assert!(!result.is_null()); + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_string_to_base64_various_encodings() { + let input = CString::new("Test").unwrap(); + let encodings = vec![ + "UTF8", + "ASCII", + "Unicode", + "UTF32", + "BigEndianUnicode", + "Default", + ]; + for enc in encodings { + let encoding = CString::new(enc).unwrap(); + let result = unsafe { string_to_base64(input.as_ptr(), encoding.as_ptr()) }; + assert!(!result.is_null()); + unsafe { crate::memory::free_string(result) }; + } + } +} diff --git a/lib/src/compression.rs b/lib/src/compression.rs deleted file mode 100644 index 5ad48ea..0000000 --- a/lib/src/compression.rs +++ /dev/null @@ -1,877 +0,0 @@ -//! Compression and decompression functions using Gzip -//! -//! This module provides compression and decompression using the `flate2` crate -//! with default compression settings (level 6). The implementation uses streaming -//! compression/decompression for memory efficiency, making it suitable for both -//! small and large payloads. - -use base64::Engine as _; -use flate2::Compression; -use flate2::read::GzDecoder; -use flate2::write::GzEncoder; -use std::ffi::{CStr, CString}; -use std::io::{Read, Write}; -use std::os::raw::c_char; - -/// Compress a string using Gzip compression -/// -/// Converts the input string to bytes using the specified encoding, then compresses -/// the bytes using Gzip compression. The compressed data is returned as a byte array -/// with metadata header for proper deallocation. -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `input` is a valid null-terminated C string or null -/// - `encoding` is a valid null-terminated C string or null -/// - `out_length` is a valid pointer to a usize -/// - The returned pointer must be freed using `free_bytes` -/// -/// # Arguments -/// * `input` - The string to compress -/// * `encoding` - The character encoding to use (UTF8, ASCII, Unicode, UTF32, BigEndianUnicode, Default) -/// * `out_length` - Pointer to store the length of compressed data -/// -/// # Returns -/// Pointer to compressed byte array, or null on error -#[unsafe(no_mangle)] -pub unsafe extern "C" fn compress_string( - input: *const c_char, - encoding: *const c_char, - out_length: *mut usize, -) -> *mut u8 { - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - }; - - // Convert string to bytes based on encoding - let bytes = match crate::base64::convert_string_to_bytes(input_str, encoding_str) { - Ok(b) => b, - Err(e) => { - crate::error::set_error(e); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - }; - - // Compress using Gzip - let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); - if let Err(e) = encoder.write_all(&bytes) { - crate::error::set_error(format!("Compression write failed: {}", e)); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - - let compressed = match encoder.finish() { - Ok(data) => data, - Err(e) => { - crate::error::set_error(format!("Compression finish failed: {}", e)); - if !out_length.is_null() { - unsafe { - *out_length = 0; - } - } - return std::ptr::null_mut(); - } - }; - - // Set output length - let length = compressed.len(); - if !out_length.is_null() { - unsafe { - *out_length = length; - } - } - - // Allocate byte array with metadata header for proper deallocation - crate::error::clear_error(); - crate::memory::allocate_byte_array(compressed) -} - -/// Decompress a Gzip-compressed byte array to a string -/// -/// Decompresses the input byte array using Gzip, then converts the decompressed -/// bytes to a string using the specified encoding. Handles special characters, -/// Unicode, and various encodings correctly. -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `bytes` is a valid pointer to a byte array or null -/// - `length` accurately represents the number of bytes to read -/// - `encoding` is a valid null-terminated C string or null -/// - The returned pointer must be freed using `free_string` -/// -/// # Arguments -/// * `bytes` - Pointer to compressed byte array -/// * `length` - Length of compressed data -/// * `encoding` - The character encoding to use for the output string (UTF8, ASCII, Unicode, UTF32, BigEndianUnicode, Default) -/// -/// # Returns -/// Pointer to decompressed string, or null on error -#[unsafe(no_mangle)] -pub unsafe extern "C" fn decompress_string( - bytes: *const u8, - length: usize, - encoding: *const c_char, -) -> *mut c_char { - // Validate null pointers - if bytes.is_null() { - crate::error::set_error("Byte array pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert encoding C string to Rust string - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Create a slice from the raw pointer - let compressed_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; - - // Decompress using Gzip - let mut decoder = GzDecoder::new(compressed_slice); - let mut decompressed = Vec::new(); - - if let Err(e) = decoder.read_to_end(&mut decompressed) { - crate::error::set_error(format!("Decompression failed: {}", e)); - return std::ptr::null_mut(); - } - - // Convert bytes to string based on encoding - let result_string = match crate::base64::convert_bytes_to_string(&decompressed, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert to C string - match CString::new(result_string) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error( - "Failed to create C string from decompressed result".to_string(), - ); - std::ptr::null_mut() - } - } -} - -/// Decompress a Gzip-compressed byte array to a string with Latin-1 fallback -/// -/// This is a lenient version of `decompress_string` that automatically falls back to -/// Latin-1 (ISO-8859-1) encoding when the decompressed byte sequence is invalid for -/// the specified encoding. This is useful for handling binary data (like certificates) -/// that may not be valid text in any standard encoding. -/// -/// Use this function when you want best-effort conversion without errors. -/// Use `decompress_string` when you want strict validation of the encoding. -/// -/// # Safety -/// Same safety requirements as `decompress_string`. -#[unsafe(no_mangle)] -pub unsafe extern "C" fn decompress_string_lenient( - bytes: *const u8, - length: usize, - encoding: *const c_char, -) -> *mut c_char { - // Validate null pointers - if bytes.is_null() { - crate::error::set_error("Byte array pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert encoding C string to Rust string - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Create a slice from the raw pointer - let compressed_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; - - // Decompress using Gzip - let mut decoder = GzDecoder::new(compressed_slice); - let mut decompressed = Vec::new(); - - if let Err(e) = decoder.read_to_end(&mut decompressed) { - crate::error::set_error(format!("Decompression failed: {}", e)); - return std::ptr::null_mut(); - } - - // Convert bytes to string with Latin-1 fallback for binary data - let result_string = - match crate::base64::convert_bytes_to_string_with_fallback(&decompressed, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert to C string - match CString::new(result_string) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error( - "Failed to create C string from decompressed result".to_string(), - ); - std::ptr::null_mut() - } - } -} - -/// Decode a Base64 string, decompress it, and convert to a string in one operation -/// -/// This function combines Base64 decoding, Gzip decompression, and string conversion -/// into a single FFI call, reducing the overhead of multiple round-trips between -/// PowerShell and Rust. -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `input` is a valid null-terminated C string or null -/// - `encoding` is a valid null-terminated C string or null -/// - The returned pointer must be freed using `free_string` -/// -/// # Arguments -/// * `input` - Base64 encoded compressed string -/// * `encoding` - The character encoding to use for the output string -/// -/// # Returns -/// Pointer to decompressed string, or null on error -#[unsafe(no_mangle)] -pub unsafe extern "C" fn base64_to_decompressed_string( - input: *const c_char, - encoding: *const c_char, -) -> *mut c_char { - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Decode from Base64 - let compressed_bytes = match base64::engine::general_purpose::STANDARD.decode(input_str) { - Ok(bytes) => bytes, - Err(e) => { - crate::error::set_error(format!("Failed to decode Base64: {}", e)); - return std::ptr::null_mut(); - } - }; - - // Decompress using Gzip - let mut decoder = GzDecoder::new(compressed_bytes.as_slice()); - let mut decompressed = Vec::new(); - - if let Err(e) = decoder.read_to_end(&mut decompressed) { - crate::error::set_error(format!("Decompression failed: {}", e)); - return std::ptr::null_mut(); - } - - // Convert bytes to string based on encoding (strict mode) - let result_string = match crate::base64::convert_bytes_to_string(&decompressed, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert to C string - match CString::new(result_string) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error( - "Failed to create C string from decompressed result".to_string(), - ); - std::ptr::null_mut() - } - } -} - -/// Decode a Base64 string, decompress it, and convert to a string with Latin-1 fallback -/// -/// This is a lenient version of `base64_to_decompressed_string` that automatically -/// falls back to Latin-1 (ISO-8859-1) encoding when the decompressed bytes are invalid -/// for the specified encoding. -/// -/// # Safety -/// Same safety requirements as `base64_to_decompressed_string`. -#[unsafe(no_mangle)] -pub unsafe extern "C" fn base64_to_decompressed_string_lenient( - input: *const c_char, - encoding: *const c_char, -) -> *mut c_char { - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Decode from Base64 - let compressed_bytes = match base64::engine::general_purpose::STANDARD.decode(input_str) { - Ok(bytes) => bytes, - Err(e) => { - crate::error::set_error(format!("Failed to decode Base64: {}", e)); - return std::ptr::null_mut(); - } - }; - - // Decompress using Gzip - let mut decoder = GzDecoder::new(compressed_bytes.as_slice()); - let mut decompressed = Vec::new(); - - if let Err(e) = decoder.read_to_end(&mut decompressed) { - crate::error::set_error(format!("Decompression failed: {}", e)); - return std::ptr::null_mut(); - } - - // Convert bytes to string with Latin-1 fallback for binary data - let result_string = - match crate::base64::convert_bytes_to_string_with_fallback(&decompressed, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert to C string - match CString::new(result_string) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error( - "Failed to create C string from decompressed result".to_string(), - ); - std::ptr::null_mut() - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::ffi::CString; - - // ===== Test Helpers ===== - - /// RAII guard for automatic cleanup of compressed byte arrays - struct CompressedBytes { - ptr: *mut u8, - length: usize, - } - - impl CompressedBytes { - fn new(ptr: *mut u8, length: usize) -> Self { - Self { ptr, length } - } - - fn as_ptr(&self) -> *const u8 { - self.ptr - } - - fn len(&self) -> usize { - self.length - } - - fn is_null(&self) -> bool { - self.ptr.is_null() - } - } - - impl Drop for CompressedBytes { - fn drop(&mut self) { - if !self.ptr.is_null() { - unsafe { crate::memory::free_bytes(self.ptr) }; - } - } - } - - /// RAII guard for automatic cleanup of decompressed strings - struct DecompressedString { - ptr: *mut c_char, - } - - impl DecompressedString { - fn new(ptr: *mut c_char) -> Self { - Self { ptr } - } - - fn is_null(&self) -> bool { - self.ptr.is_null() - } - - fn to_str(&self) -> Result<&str, std::str::Utf8Error> { - if self.ptr.is_null() { - panic!("Cannot convert null pointer to string"); - } - unsafe { CStr::from_ptr(self.ptr).to_str() } - } - } - - impl Drop for DecompressedString { - fn drop(&mut self) { - if !self.ptr.is_null() { - unsafe { crate::memory::free_string(self.ptr) }; - } - } - } - - /// Helper to compress a string with automatic cleanup - fn compress_with_encoding(input: &str, encoding: &str) -> CompressedBytes { - let input_cstr = CString::new(input).unwrap(); - let encoding_cstr = CString::new(encoding).unwrap(); - let mut out_length: usize = 0; - - let ptr = unsafe { - compress_string( - input_cstr.as_ptr(), - encoding_cstr.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - CompressedBytes::new(ptr, out_length) - } - - /// Helper to decompress bytes with automatic cleanup - fn decompress_with_encoding(bytes: &CompressedBytes, encoding: &str) -> DecompressedString { - let encoding_cstr = CString::new(encoding).unwrap(); - - let ptr = unsafe { decompress_string(bytes.as_ptr(), bytes.len(), encoding_cstr.as_ptr()) }; - - DecompressedString::new(ptr) - } - - /// Helper to perform a full round-trip compression/decompression - fn round_trip(input: &str, encoding: &str) -> String { - let compressed = compress_with_encoding(input, encoding); - assert!(!compressed.is_null(), "Compression failed for: {}", input); - - let decompressed = decompress_with_encoding(&compressed, encoding); - assert!( - !decompressed.is_null(), - "Decompression failed for: {}", - input - ); - - decompressed.to_str().unwrap().to_string() - } - - // ===== Tests for compress_string ===== - - #[test] - fn test_compress_string_happy_path_utf8() { - // Test: compress "test string" with UTF8 encoding - let compressed = compress_with_encoding("test string", "UTF8"); - - assert!( - !compressed.is_null(), - "Result should not be null for valid input" - ); - assert!( - compressed.len() > 0, - "Output length should be greater than 0" - ); - - // Verify we can read the compressed data - let compressed_data = - unsafe { std::slice::from_raw_parts(compressed.as_ptr(), compressed.len()) }; - assert!( - !compressed_data.is_empty(), - "Compressed data should not be empty" - ); - } - - #[test] - fn test_compress_string_null_input_pointer() { - // Test: null input pointer should return null - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - compress_string( - std::ptr::null(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - result.is_null(), - "Result should be null for null input pointer" - ); - assert_eq!(out_length, 0, "Output length should be 0 for null input"); - } - - #[test] - fn test_compress_string_null_encoding_pointer() { - // Test: null encoding pointer should return null - let input = CString::new("test string").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - compress_string( - input.as_ptr(), - std::ptr::null(), - &mut out_length as *mut usize, - ) - }; - - assert!( - result.is_null(), - "Result should be null for null encoding pointer" - ); - assert_eq!(out_length, 0, "Output length should be 0 for null encoding"); - } - - #[test] - fn test_compress_string_invalid_encoding() { - // Test: invalid encoding should return null - let input = CString::new("test string").unwrap(); - let encoding = CString::new("INVALID_ENCODING").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - compress_string( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - result.is_null(), - "Result should be null for invalid encoding" - ); - assert_eq!( - out_length, 0, - "Output length should be 0 for invalid encoding" - ); - } - - #[test] - fn test_compress_string_empty_string() { - // Test: empty string should compress successfully - let compressed = compress_with_encoding("", "UTF8"); - - assert!( - !compressed.is_null(), - "Result should not be null for empty string" - ); - assert!( - compressed.len() > 0, - "Gzip header should produce non-zero output even for empty input" - ); - } - - #[test] - fn test_compress_string_large_string_1mb() { - // Test: large string (1MB) should compress successfully - let large_string = "A".repeat(1024 * 1024); - let compressed = compress_with_encoding(&large_string, "UTF8"); - - assert!( - !compressed.is_null(), - "Result should not be null for large string" - ); - assert!( - compressed.len() > 0, - "Output length should be greater than 0" - ); - } - - #[test] - fn test_compress_string_output_smaller_than_input() { - // Test: verify compressed output is smaller than input for repetitive data - let repetitive_string = "AAAAAAAAAA".repeat(1000); // 10,000 bytes of 'A' - let original_size = repetitive_string.len(); - let compressed = compress_with_encoding(&repetitive_string, "UTF8"); - - assert!(!compressed.is_null(), "Result should not be null"); - - // Compressed size should be significantly smaller than original - assert!( - compressed.len() < original_size, - "Compressed size ({}) should be smaller than original size ({})", - compressed.len(), - original_size - ); - - // For highly repetitive data, compression should be very effective - assert!( - compressed.len() < original_size / 10, - "Compressed size ({}) should be less than 10% of original size ({})", - compressed.len(), - original_size - ); - } - - #[test] - fn test_compress_string_various_encodings() { - // Test: compress with various supported encodings - let test_string = "Hello World"; - let encodings = vec!["UTF8", "ASCII", "Unicode"]; - - for encoding_name in encodings { - let compressed = compress_with_encoding(test_string, encoding_name); - - assert!( - !compressed.is_null(), - "Result should not be null for encoding: {}", - encoding_name - ); - assert!( - compressed.len() > 0, - "Output length should be greater than 0 for encoding: {}", - encoding_name - ); - } - } - - // ===== Tests for decompress_string ===== - - #[test] - fn test_decompress_string_happy_path() { - // Test: decompress to original string - let original = "test string for decompression"; - let result = round_trip(original, "UTF8"); - - assert_eq!( - result, original, - "Decompressed string should match original" - ); - } - - #[test] - fn test_decompress_string_round_trip() { - // Test: compress/decompress round-trip preserves data - let repetitive_data = "A".repeat(1000); - let test_cases = vec![ - "Simple text", - "Text with numbers 12345", - "Special chars: !@#$%^&*()", - "Unicode: Hello 世界 🌍", - repetitive_data.as_str(), - ]; - - for original in test_cases { - let result = round_trip(original, "UTF8"); - assert_eq!( - result, original, - "Round-trip should preserve data for: {}", - original - ); - } - } - - #[test] - fn test_decompress_string_null_pointer() { - // Test: null pointer should return null - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { decompress_string(std::ptr::null(), 0, encoding.as_ptr()) }; - - assert!(result.is_null(), "Result should be null for null pointer"); - } - - #[test] - fn test_decompress_string_invalid_compressed_data() { - // Test: invalid compressed data should return null - let invalid_data = [0xFF, 0xFE, 0xFD, 0xFC]; // Not valid Gzip data - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { - decompress_string(invalid_data.as_ptr(), invalid_data.len(), encoding.as_ptr()) - }; - - assert!( - result.is_null(), - "Result should be null for invalid compressed data" - ); - } - - #[test] - fn test_decompress_string_various_encodings() { - // Test: decompress with various encodings - let original = "Test String"; - let encodings = vec!["UTF8", "ASCII"]; - - for encoding_name in encodings { - let result = round_trip(original, encoding_name); - assert_eq!( - result, original, - "Round-trip should preserve data for encoding: {}", - encoding_name - ); - } - } - - #[test] - fn test_decompress_string_null_encoding_pointer() { - // Test: null encoding pointer should return null - let data = [0x1F, 0x8B]; // Gzip magic number - - let result = unsafe { decompress_string(data.as_ptr(), data.len(), std::ptr::null()) }; - - assert!( - result.is_null(), - "Result should be null for null encoding pointer" - ); - } - - #[test] - fn test_decompress_string_empty_compressed_data() { - // Test: empty compressed data should return null or handle gracefully - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { decompress_string(std::ptr::null(), 0, encoding.as_ptr()) }; - - assert!( - result.is_null(), - "Result should be null for empty compressed data" - ); - } - - #[test] - fn test_decompress_string_emoji() { - // Test: emoji characters should round-trip correctly - let original = "Hello 👋 World 🌍"; - let result = round_trip(original, "UTF8"); - - assert_eq!(result, original, "Emoji should round-trip correctly"); - - // Verify the bytes are correct - let original_bytes = original.as_bytes(); - let result_bytes = result.as_bytes(); - assert_eq!(result_bytes, original_bytes, "Bytes should match exactly"); - } -} diff --git a/lib/src/compression/base64_decompress.rs b/lib/src/compression/base64_decompress.rs new file mode 100644 index 0000000..2112172 --- /dev/null +++ b/lib/src/compression/base64_decompress.rs @@ -0,0 +1,166 @@ +//! Base64 decode and decompress functions + +use base64::Engine as _; +use flate2::read::GzDecoder; +use std::ffi::{CStr, CString}; +use std::io::Read; +use std::os::raw::c_char; + +/// Decode a Base64 string, decompress it, and convert to a string in one operation +/// +/// This function combines Base64 decoding, Gzip decompression, and string conversion +/// into a single FFI call, reducing the overhead of multiple round-trips between +/// PowerShell and Rust. +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `input` is a valid null-terminated C string or null +/// - `encoding` is a valid null-terminated C string or null +/// - The returned pointer must be freed using `free_string` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn base64_to_decompressed_string( + input: *const c_char, + encoding: *const c_char, +) -> *mut c_char { + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + let compressed_bytes = match base64::engine::general_purpose::STANDARD.decode(input_str) { + Ok(bytes) => bytes, + Err(e) => { + crate::error::set_error(format!("Failed to decode Base64: {}", e)); + return std::ptr::null_mut(); + } + }; + + let mut decoder = GzDecoder::new(compressed_bytes.as_slice()); + let mut decompressed = Vec::new(); + + if let Err(e) = decoder.read_to_end(&mut decompressed) { + crate::error::set_error(format!("Decompression failed: {}", e)); + return std::ptr::null_mut(); + } + + let result_string = match crate::base64::convert_bytes_to_string(&decompressed, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match CString::new(result_string) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error( + "Failed to create C string from decompressed result".to_string(), + ); + std::ptr::null_mut() + } + } +} + +/// Decode a Base64 string, decompress it, and convert to a string with Latin-1 fallback +/// +/// This is a lenient version of `base64_to_decompressed_string` that automatically +/// falls back to Latin-1 (ISO-8859-1) encoding when the decompressed bytes are invalid +/// for the specified encoding. +/// +/// # Safety +/// Same safety requirements as `base64_to_decompressed_string`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn base64_to_decompressed_string_lenient( + input: *const c_char, + encoding: *const c_char, +) -> *mut c_char { + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + let compressed_bytes = match base64::engine::general_purpose::STANDARD.decode(input_str) { + Ok(bytes) => bytes, + Err(e) => { + crate::error::set_error(format!("Failed to decode Base64: {}", e)); + return std::ptr::null_mut(); + } + }; + + let mut decoder = GzDecoder::new(compressed_bytes.as_slice()); + let mut decompressed = Vec::new(); + + if let Err(e) = decoder.read_to_end(&mut decompressed) { + crate::error::set_error(format!("Decompression failed: {}", e)); + return std::ptr::null_mut(); + } + + let result_string = + match crate::base64::convert_bytes_to_string_with_fallback(&decompressed, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match CString::new(result_string) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error( + "Failed to create C string from decompressed result".to_string(), + ); + std::ptr::null_mut() + } + } +} diff --git a/lib/src/compression/compress.rs b/lib/src/compression/compress.rs new file mode 100644 index 0000000..c785b64 --- /dev/null +++ b/lib/src/compression/compress.rs @@ -0,0 +1,331 @@ +//! String compression functions + +use flate2::Compression; +use flate2::write::GzEncoder; +use std::ffi::CStr; +use std::io::Write; +use std::os::raw::c_char; + +/// Compress a string using Gzip compression +/// +/// Converts the input string to bytes using the specified encoding, then compresses +/// the bytes using Gzip compression. The compressed data is returned as a byte array +/// with metadata header for proper deallocation. +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `input` is a valid null-terminated C string or null +/// - `encoding` is a valid null-terminated C string or null +/// - `out_length` is a valid pointer to a usize +/// - The returned pointer must be freed using `free_bytes` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn compress_string( + input: *const c_char, + encoding: *const c_char, + out_length: *mut usize, +) -> *mut u8 { + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + }; + + let bytes = match crate::base64::convert_string_to_bytes(input_str, encoding_str) { + Ok(b) => b, + Err(e) => { + crate::error::set_error(e); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + }; + + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + if let Err(e) = encoder.write_all(&bytes) { + crate::error::set_error(format!("Compression write failed: {}", e)); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + + let compressed = match encoder.finish() { + Ok(data) => data, + Err(e) => { + crate::error::set_error(format!("Compression finish failed: {}", e)); + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } + return std::ptr::null_mut(); + } + }; + + let length = compressed.len(); + if !out_length.is_null() { + unsafe { + *out_length = length; + } + } + + crate::error::clear_error(); + crate::memory::allocate_byte_array(compressed) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + struct CompressedBytes { + ptr: *mut u8, + length: usize, + } + + impl CompressedBytes { + fn new(ptr: *mut u8, length: usize) -> Self { + Self { ptr, length } + } + + fn is_null(&self) -> bool { + self.ptr.is_null() + } + + fn len(&self) -> usize { + self.length + } + + fn as_ptr(&self) -> *const u8 { + self.ptr + } + } + + impl Drop for CompressedBytes { + fn drop(&mut self) { + if !self.ptr.is_null() { + unsafe { crate::memory::free_bytes(self.ptr) }; + } + } + } + + fn compress_with_encoding(input: &str, encoding: &str) -> CompressedBytes { + let input_cstr = CString::new(input).unwrap(); + let encoding_cstr = CString::new(encoding).unwrap(); + let mut out_length: usize = 0; + + let ptr = unsafe { + compress_string( + input_cstr.as_ptr(), + encoding_cstr.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + CompressedBytes::new(ptr, out_length) + } + + #[test] + fn test_compress_string_happy_path_utf8() { + let compressed = compress_with_encoding("test string", "UTF8"); + + assert!( + !compressed.is_null(), + "Result should not be null for valid input" + ); + assert!( + compressed.len() > 0, + "Output length should be greater than 0" + ); + + let compressed_data = + unsafe { std::slice::from_raw_parts(compressed.as_ptr(), compressed.len()) }; + assert!( + !compressed_data.is_empty(), + "Compressed data should not be empty" + ); + } + + #[test] + fn test_compress_string_null_input_pointer() { + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + compress_string( + std::ptr::null(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + result.is_null(), + "Result should be null for null input pointer" + ); + assert_eq!(out_length, 0, "Output length should be 0 for null input"); + } + + #[test] + fn test_compress_string_null_encoding_pointer() { + let input = CString::new("test string").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + compress_string( + input.as_ptr(), + std::ptr::null(), + &mut out_length as *mut usize, + ) + }; + + assert!( + result.is_null(), + "Result should be null for null encoding pointer" + ); + assert_eq!(out_length, 0, "Output length should be 0 for null encoding"); + } + + #[test] + fn test_compress_string_invalid_encoding() { + let input = CString::new("test string").unwrap(); + let encoding = CString::new("INVALID_ENCODING").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + compress_string( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + result.is_null(), + "Result should be null for invalid encoding" + ); + assert_eq!( + out_length, 0, + "Output length should be 0 for invalid encoding" + ); + } + + #[test] + fn test_compress_string_empty_string() { + let compressed = compress_with_encoding("", "UTF8"); + + assert!( + !compressed.is_null(), + "Result should not be null for empty string" + ); + assert!( + compressed.len() > 0, + "Gzip header should produce non-zero output even for empty input" + ); + } + + #[test] + fn test_compress_string_large_string_1mb() { + let large_string = "A".repeat(1024 * 1024); + let compressed = compress_with_encoding(&large_string, "UTF8"); + + assert!( + !compressed.is_null(), + "Result should not be null for large string" + ); + assert!( + compressed.len() > 0, + "Output length should be greater than 0" + ); + } + + #[test] + fn test_compress_string_output_smaller_than_input() { + let repetitive_string = "AAAAAAAAAA".repeat(1000); + let original_size = repetitive_string.len(); + let compressed = compress_with_encoding(&repetitive_string, "UTF8"); + + assert!(!compressed.is_null(), "Result should not be null"); + + assert!( + compressed.len() < original_size, + "Compressed size ({}) should be smaller than original size ({})", + compressed.len(), + original_size + ); + + assert!( + compressed.len() < original_size / 10, + "Compressed size ({}) should be less than 10% of original size ({})", + compressed.len(), + original_size + ); + } + + #[test] + fn test_compress_string_various_encodings() { + let test_string = "Hello World"; + let encodings = vec!["UTF8", "ASCII", "Unicode"]; + + for encoding_name in encodings { + let compressed = compress_with_encoding(test_string, encoding_name); + + assert!( + !compressed.is_null(), + "Result should not be null for encoding: {}", + encoding_name + ); + assert!( + compressed.len() > 0, + "Output length should be greater than 0 for encoding: {}", + encoding_name + ); + } + } +} diff --git a/lib/src/compression/decompress.rs b/lib/src/compression/decompress.rs new file mode 100644 index 0000000..4e59bd6 --- /dev/null +++ b/lib/src/compression/decompress.rs @@ -0,0 +1,355 @@ +//! Gzip decompression functions + +use flate2::read::GzDecoder; +use std::ffi::{CStr, CString}; +use std::io::Read; +use std::os::raw::c_char; + +/// Decompress a Gzip-compressed byte array to a string +/// +/// Decompresses the input byte array using Gzip, then converts the decompressed +/// bytes to a string using the specified encoding. Handles special characters, +/// Unicode, and various encodings correctly. +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `bytes` is a valid pointer to a byte array or null +/// - `length` accurately represents the number of bytes to read +/// - `encoding` is a valid null-terminated C string or null +/// - The returned pointer must be freed using `free_string` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn decompress_string( + bytes: *const u8, + length: usize, + encoding: *const c_char, +) -> *mut c_char { + if bytes.is_null() { + crate::error::set_error("Byte array pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + let compressed_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; + + let mut decoder = GzDecoder::new(compressed_slice); + let mut decompressed = Vec::new(); + + if let Err(e) = decoder.read_to_end(&mut decompressed) { + crate::error::set_error(format!("Decompression failed: {}", e)); + return std::ptr::null_mut(); + } + + let result_string = match crate::base64::convert_bytes_to_string(&decompressed, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match CString::new(result_string) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error( + "Failed to create C string from decompressed result".to_string(), + ); + std::ptr::null_mut() + } + } +} + +/// Decompress a Gzip-compressed byte array to a string with Latin-1 fallback +/// +/// This is a lenient version of `decompress_string` that automatically falls back to +/// Latin-1 (ISO-8859-1) encoding when the decompressed byte sequence is invalid for +/// the specified encoding. This is useful for handling binary data (like certificates) +/// that may not be valid text in any standard encoding. +/// +/// Use this function when you want best-effort conversion without errors. +/// Use `decompress_string` when you want strict validation of the encoding. +/// +/// # Safety +/// Same safety requirements as `decompress_string`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn decompress_string_lenient( + bytes: *const u8, + length: usize, + encoding: *const c_char, +) -> *mut c_char { + if bytes.is_null() { + crate::error::set_error("Byte array pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + let compressed_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; + + let mut decoder = GzDecoder::new(compressed_slice); + let mut decompressed = Vec::new(); + + if let Err(e) = decoder.read_to_end(&mut decompressed) { + crate::error::set_error(format!("Decompression failed: {}", e)); + return std::ptr::null_mut(); + } + + let result_string = + match crate::base64::convert_bytes_to_string_with_fallback(&decompressed, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match CString::new(result_string) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error( + "Failed to create C string from decompressed result".to_string(), + ); + std::ptr::null_mut() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + struct CompressedBytes { + ptr: *mut u8, + length: usize, + } + + impl CompressedBytes { + fn new(ptr: *mut u8, length: usize) -> Self { + Self { ptr, length } + } + + fn as_ptr(&self) -> *const u8 { + self.ptr + } + + fn len(&self) -> usize { + self.length + } + + fn is_null(&self) -> bool { + self.ptr.is_null() + } + } + + impl Drop for CompressedBytes { + fn drop(&mut self) { + if !self.ptr.is_null() { + unsafe { crate::memory::free_bytes(self.ptr) }; + } + } + } + + struct DecompressedString { + ptr: *mut c_char, + } + + impl DecompressedString { + fn new(ptr: *mut c_char) -> Self { + Self { ptr } + } + + fn is_null(&self) -> bool { + self.ptr.is_null() + } + + fn to_str(&self) -> Result<&str, std::str::Utf8Error> { + if self.ptr.is_null() { + panic!("Cannot convert null pointer to string"); + } + unsafe { CStr::from_ptr(self.ptr).to_str() } + } + } + + impl Drop for DecompressedString { + fn drop(&mut self) { + if !self.ptr.is_null() { + unsafe { crate::memory::free_string(self.ptr) }; + } + } + } + + fn compress_with_encoding(input: &str, encoding: &str) -> CompressedBytes { + let input_cstr = CString::new(input).unwrap(); + let encoding_cstr = CString::new(encoding).unwrap(); + let mut out_length: usize = 0; + + let ptr = unsafe { + crate::compression::compress_string( + input_cstr.as_ptr(), + encoding_cstr.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + CompressedBytes::new(ptr, out_length) + } + + fn decompress_with_encoding(bytes: &CompressedBytes, encoding: &str) -> DecompressedString { + let encoding_cstr = CString::new(encoding).unwrap(); + + let ptr = unsafe { decompress_string(bytes.as_ptr(), bytes.len(), encoding_cstr.as_ptr()) }; + + DecompressedString::new(ptr) + } + + fn round_trip(input: &str, encoding: &str) -> String { + let compressed = compress_with_encoding(input, encoding); + assert!(!compressed.is_null(), "Compression failed for: {}", input); + + let decompressed = decompress_with_encoding(&compressed, encoding); + assert!( + !decompressed.is_null(), + "Decompression failed for: {}", + input + ); + + decompressed.to_str().unwrap().to_string() + } + + #[test] + fn test_decompress_string_happy_path() { + let original = "test string for decompression"; + let result = round_trip(original, "UTF8"); + + assert_eq!( + result, original, + "Decompressed string should match original" + ); + } + + #[test] + fn test_decompress_string_round_trip() { + let repetitive_data = "A".repeat(1000); + let test_cases = vec![ + "Simple text", + "Text with numbers 12345", + "Special chars: !@#$%^&*()", + "Unicode: Hello 世界 🌍", + repetitive_data.as_str(), + ]; + + for original in test_cases { + let result = round_trip(original, "UTF8"); + assert_eq!( + result, original, + "Round-trip should preserve data for: {}", + original + ); + } + } + + #[test] + fn test_decompress_string_null_pointer() { + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { decompress_string(std::ptr::null(), 0, encoding.as_ptr()) }; + + assert!(result.is_null(), "Result should be null for null pointer"); + } + + #[test] + fn test_decompress_string_invalid_compressed_data() { + let invalid_data = [0xFF, 0xFE, 0xFD, 0xFC]; + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { + decompress_string(invalid_data.as_ptr(), invalid_data.len(), encoding.as_ptr()) + }; + + assert!( + result.is_null(), + "Result should be null for invalid compressed data" + ); + } + + #[test] + fn test_decompress_string_various_encodings() { + let original = "Test String"; + let encodings = vec!["UTF8", "ASCII"]; + + for encoding_name in encodings { + let result = round_trip(original, encoding_name); + assert_eq!( + result, original, + "Round-trip should preserve data for encoding: {}", + encoding_name + ); + } + } + + #[test] + fn test_decompress_string_null_encoding_pointer() { + let data = [0x1F, 0x8B]; + + let result = unsafe { decompress_string(data.as_ptr(), data.len(), std::ptr::null()) }; + + assert!( + result.is_null(), + "Result should be null for null encoding pointer" + ); + } + + #[test] + fn test_decompress_string_empty_compressed_data() { + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { decompress_string(std::ptr::null(), 0, encoding.as_ptr()) }; + + assert!( + result.is_null(), + "Result should be null for empty compressed data" + ); + } + + #[test] + fn test_decompress_string_emoji() { + let original = "Hello 👋 World 🌍"; + let result = round_trip(original, "UTF8"); + + assert_eq!(result, original, "Emoji should round-trip correctly"); + + let original_bytes = original.as_bytes(); + let result_bytes = result.as_bytes(); + assert_eq!(result_bytes, original_bytes, "Bytes should match exactly"); + } +} diff --git a/lib/src/compression/mod.rs b/lib/src/compression/mod.rs new file mode 100644 index 0000000..3f87457 --- /dev/null +++ b/lib/src/compression/mod.rs @@ -0,0 +1,9 @@ +//! Compression and decompression functions using Gzip + +mod base64_decompress; +mod compress; +mod decompress; + +pub use base64_decompress::{base64_to_decompressed_string, base64_to_decompressed_string_lenient}; +pub use compress::compress_string; +pub use decompress::{decompress_string, decompress_string_lenient}; diff --git a/lib/src/encoding.rs b/lib/src/encoding.rs deleted file mode 100644 index 0840225..0000000 --- a/lib/src/encoding.rs +++ /dev/null @@ -1,1261 +0,0 @@ -//! String to byte array encoding functions -//! -//! This module provides functions to convert strings to byte arrays using various -//! text encodings (UTF-8, ASCII, Unicode/UTF-16, UTF-32, BigEndianUnicode). -//! The encoding conversion logic is shared with the base64 module to ensure -//! consistent behavior across the library. - -use std::ffi::CStr; -use std::os::raw::c_char; - -/// Convert a string to a byte array using the specified encoding -/// -/// Supports UTF-8, ASCII, Unicode (UTF-16LE), UTF-32, BigEndianUnicode (UTF-16BE), -/// and Default (UTF-8) encodings. The encoding name is case-insensitive and supports -/// both hyphenated (UTF-8) and non-hyphenated (UTF8) variants. -/// -/// # Arguments -/// * `input` - Null-terminated C string to convert -/// * `encoding` - Null-terminated C string specifying the encoding -/// * `out_length` - Optional pointer to store the byte array length -/// -/// # Returns -/// Pointer to allocated byte array, or null on error. The caller must free the -/// returned pointer using `free_bytes`. -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `input` is a valid null-terminated C string or null -/// - `encoding` is a valid null-terminated C string or null -/// - `out_length` is a valid pointer to a usize or null (optional) -/// - The returned pointer must be freed using `free_bytes` -/// -/// # Error Handling -/// Returns null pointer and sets error message via `set_error` if: -/// - Input or encoding pointer is null -/// - Input or encoding contains invalid UTF-8 -/// - Encoding name is not supported -/// - ASCII encoding is used with non-ASCII characters -#[unsafe(no_mangle)] -pub unsafe extern "C" fn string_to_bytes( - input: *const c_char, - encoding: *const c_char, - out_length: *mut usize, -) -> *mut u8 { - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - set_output_length_zero(out_length); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - set_output_length_zero(out_length); - return std::ptr::null_mut(); - } - - // SAFETY: Pointers are validated as non-null above - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - set_output_length_zero(out_length); - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - set_output_length_zero(out_length); - return std::ptr::null_mut(); - } - }; - - // Check for deprecated UTF7 encoding (both UTF7 and UTF-7 variants) - if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { - crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); - set_output_length_zero(out_length); - return std::ptr::null_mut(); - } - - // Convert string to bytes using shared encoding logic - let bytes = match crate::base64::convert_string_to_bytes(input_str, encoding_str) { - Ok(b) => b, - Err(e) => { - crate::error::set_error(e); - set_output_length_zero(out_length); - return std::ptr::null_mut(); - } - }; - - // Set output length (only if pointer provided) - let length = bytes.len(); - if !out_length.is_null() { - // SAFETY: out_length is validated as non-null - unsafe { - *out_length = length; - } - } - - // Allocate byte array with metadata header for proper deallocation - crate::error::clear_error(); - crate::memory::allocate_byte_array(bytes) -} - -/// Helper function to set output length to zero -/// -/// Safely sets the output length parameter to zero if the pointer is non-null. -/// This is used in error paths to ensure consistent behavior. -#[inline] -fn set_output_length_zero(out_length: *mut usize) { - if !out_length.is_null() { - // SAFETY: Pointer is validated as non-null - unsafe { - *out_length = 0; - } - } -} - -/// Convert a byte array to a string using the specified encoding -/// -/// Supports UTF-8, ASCII, Unicode (UTF-16LE), UTF-32, BigEndianUnicode (UTF-16BE), -/// and Default (UTF-8) encodings. The encoding name is case-insensitive and supports -/// both hyphenated (UTF-8) and non-hyphenated (UTF8) variants. -/// -/// # Arguments -/// * `bytes` - Pointer to byte array to convert -/// * `length` - Length of the byte array -/// * `encoding` - Null-terminated C string specifying the encoding -/// -/// # Returns -/// Pointer to allocated null-terminated C string, or null on error. The caller must -/// free the returned pointer using `free_string`. -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `bytes` is a valid pointer to a byte array of at least `length` bytes, or null if length is 0 -/// - `encoding` is a valid null-terminated C string or null -/// - The returned pointer must be freed using `free_string` -/// -/// # Error Handling -/// Returns null pointer and sets error message via `set_error` if: -/// - Encoding pointer is null -/// - Encoding contains invalid UTF-8 -/// - Encoding name is not supported -/// - Byte sequence is invalid for the specified encoding -#[unsafe(no_mangle)] -pub unsafe extern "C" fn bytes_to_string( - bytes: *const u8, - length: usize, - encoding: *const c_char, -) -> *mut c_char { - // Validate encoding pointer first (consistent with string_to_bytes) - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // SAFETY: encoding pointer is validated as non-null above - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Handle empty byte array case - if length == 0 { - crate::error::clear_error(); - let empty = std::ffi::CString::new("").unwrap(); - return empty.into_raw(); - } - - // Validate bytes pointer (only needed when length > 0) - if bytes.is_null() { - crate::error::set_error("Bytes pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // SAFETY: bytes pointer is validated as non-null and length is provided by caller - let byte_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; - - // Check for deprecated UTF7 encoding (both UTF7 and UTF-7 variants) - if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { - crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); - return std::ptr::null_mut(); - } - - // Convert bytes to string using shared encoding logic (strict - no fallback) - let result_string = match crate::base64::convert_bytes_to_string(byte_slice, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert Rust string to C string - match std::ffi::CString::new(result_string) { - Ok(c_string) => { - crate::error::clear_error(); - c_string.into_raw() - } - Err(_) => { - crate::error::set_error("Result string contains null byte".to_string()); - std::ptr::null_mut() - } - } -} - -/// Convert a byte array to a string using the specified encoding with Latin-1 fallback -/// -/// This is a lenient version of `bytes_to_string` that automatically falls back to -/// Latin-1 (ISO-8859-1) encoding when the byte sequence is invalid for the specified -/// encoding. This is useful for handling binary data (like certificates) that may not -/// be valid text in any standard encoding. -/// -/// Use this function when you want best-effort conversion without errors. -/// Use `bytes_to_string` when you want strict validation of the encoding. -/// -/// # Arguments -/// * `bytes` - Pointer to byte array to convert -/// * `length` - Length of the byte array -/// * `encoding` - Null-terminated C string specifying the encoding -/// -/// # Returns -/// Pointer to allocated null-terminated C string, or null on error. The caller must -/// free the returned pointer using `free_string`. -/// -/// # Safety -/// Same safety requirements as `bytes_to_string`. -#[unsafe(no_mangle)] -pub unsafe extern "C" fn bytes_to_string_lenient( - bytes: *const u8, - length: usize, - encoding: *const c_char, -) -> *mut c_char { - // Validate encoding pointer first - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // SAFETY: encoding pointer is validated as non-null above - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Handle empty byte array case - if length == 0 { - crate::error::clear_error(); - let empty = std::ffi::CString::new("").unwrap(); - return empty.into_raw(); - } - - // Validate bytes pointer (only needed when length > 0) - if bytes.is_null() { - crate::error::set_error("Bytes pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // SAFETY: bytes pointer is validated as non-null and length is provided by caller - let byte_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; - - // Check for deprecated UTF7 encoding - if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { - crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); - return std::ptr::null_mut(); - } - - // Convert bytes to string with Latin-1 fallback for binary data - let result_string = - match crate::base64::convert_bytes_to_string_with_fallback(byte_slice, encoding_str) { - Ok(s) => s, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Convert Rust string to C string - match std::ffi::CString::new(result_string) { - Ok(c_string) => { - crate::error::clear_error(); - c_string.into_raw() - } - Err(_) => { - crate::error::set_error("Result string contains null byte".to_string()); - std::ptr::null_mut() - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::ffi::CString; - - // ========== Tests for string_to_bytes ========== - - #[test] - fn test_string_to_bytes_happy_path_utf8() { - // Test: convert "Hello" with UTF8 encoding to byte array [72, 101, 108, 108, 111] - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 5, "Output length should be 5 bytes"); - - let byte_slice = unsafe { std::slice::from_raw_parts(result, out_length) }; - assert_eq!( - byte_slice, - &[72, 101, 108, 108, 111], - "Bytes should be [72, 101, 108, 108, 111] (Hello)" - ); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_null_input_pointer() { - // Test: null input pointer should return null - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - std::ptr::null(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(result.is_null(), "Null input pointer should return null"); - assert_eq!(out_length, 0, "Output length should be 0 for null input"); - } - - #[test] - fn test_string_to_bytes_null_encoding_pointer() { - // Test: null encoding pointer should return null - let input = CString::new("Hello").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - std::ptr::null(), - &mut out_length as *mut usize, - ) - }; - - assert!(result.is_null(), "Null encoding pointer should return null"); - assert_eq!(out_length, 0, "Output length should be 0 for null encoding"); - } - - #[test] - fn test_string_to_bytes_all_supported_encodings() { - // Test: all supported encodings should work - let input = CString::new("Test").unwrap(); - let encodings = vec![ - "UTF8", - "ASCII", - "Unicode", - "UTF32", - "BigEndianUnicode", - "Default", - ]; - - for enc in encodings { - let encoding = CString::new(enc).unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - !result.is_null(), - "Result should not be null for encoding: {}", - enc - ); - assert!( - out_length > 0, - "Output length should be > 0 for encoding: {}", - enc - ); - - unsafe { crate::memory::free_bytes(result) }; - } - } - - #[test] - fn test_string_to_bytes_utf7_deprecated() { - // Test: UTF7 encoding should return null (deprecated) - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("UTF7").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - result.is_null(), - "UTF7 encoding should return null (deprecated)" - ); - assert_eq!(out_length, 0, "Output length should be 0 for UTF7"); - } - - #[test] - fn test_string_to_bytes_invalid_encoding() { - // Test: invalid encoding name should return null - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("INVALID_ENCODING").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(result.is_null(), "Invalid encoding should return null"); - assert_eq!( - out_length, 0, - "Output length should be 0 for invalid encoding" - ); - } - - #[test] - fn test_string_to_bytes_empty_string() { - // Test: empty string should encode successfully - let input = CString::new("").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - !result.is_null(), - "Result should not be null for empty string" - ); - assert_eq!(out_length, 0, "Output length should be 0 for empty string"); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_large_string() { - // Test: 1MB string should encode successfully - let large_string = "A".repeat(1024 * 1024); // 1MB of 'A' characters - let input = CString::new(large_string).unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - !result.is_null(), - "Result should not be null for large string" - ); - assert_eq!(out_length, 1024 * 1024, "Output length should be 1MB"); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_utf8_encoding() { - // Test: UTF8 encoding produces correct bytes - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 5, "UTF8 'Hello' should be 5 bytes"); - - let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; - assert_eq!(bytes, &[72, 101, 108, 108, 111]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_ascii_encoding() { - // Test: ASCII encoding produces correct bytes - let input = CString::new("ABC").unwrap(); - let encoding = CString::new("ASCII").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 3, "ASCII 'ABC' should be 3 bytes"); - - let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; - assert_eq!(bytes, &[65, 66, 67]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_unicode_encoding() { - // Test: Unicode (UTF-16LE) encoding produces correct bytes - let input = CString::new("A").unwrap(); // 'A' = U+0041 - let encoding = CString::new("Unicode").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 2, "Unicode 'A' should be 2 bytes"); - - let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; - // UTF-16LE: 'A' (U+0041) = [0x41, 0x00] - assert_eq!(bytes, &[0x41, 0x00]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_utf32_encoding() { - // Test: UTF32 encoding produces correct bytes - let input = CString::new("A").unwrap(); // 'A' = U+0041 - let encoding = CString::new("UTF32").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 4, "UTF32 'A' should be 4 bytes"); - - let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; - // UTF-32LE: 'A' (U+0041) = [0x41, 0x00, 0x00, 0x00] - assert_eq!(bytes, &[0x41, 0x00, 0x00, 0x00]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_bigendian_unicode_encoding() { - // Test: BigEndianUnicode (UTF-16BE) encoding produces correct bytes - let input = CString::new("A").unwrap(); // 'A' = U+0041 - let encoding = CString::new("BigEndianUnicode").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 2, "BigEndianUnicode 'A' should be 2 bytes"); - - let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; - // UTF-16BE: 'A' (U+0041) = [0x00, 0x41] - assert_eq!(bytes, &[0x00, 0x41]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_default_encoding() { - // Test: Default encoding should behave like UTF8 - let input = CString::new("Test").unwrap(); - let encoding = CString::new("Default").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Result should not be null"); - assert_eq!(out_length, 4, "Default 'Test' should be 4 bytes (UTF8)"); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_special_characters() { - // Test: string with special characters - let input = CString::new("Hello, World!").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - !result.is_null(), - "Result should not be null for special characters" - ); - assert_eq!(out_length, 13, "UTF8 'Hello, World!' should be 13 bytes"); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_unicode_characters() { - // Test: string with Unicode characters (emoji) - let input = CString::new("Hello 🌍").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - !result.is_null(), - "Result should not be null for Unicode characters" - ); - // "Hello " = 6 bytes, 🌍 = 4 bytes in UTF8 - assert_eq!(out_length, 10, "UTF8 'Hello 🌍' should be 10 bytes"); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_null_output_length_pointer() { - // Test: null output length pointer should be allowed (optional parameter) - let input = CString::new("Hello").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = - unsafe { string_to_bytes(input.as_ptr(), encoding.as_ptr(), std::ptr::null_mut()) }; - - assert!( - !result.is_null(), - "Should succeed with null out_length pointer" - ); - - // Verify the data is correct - let data = unsafe { std::slice::from_raw_parts(result, 5) }; - assert_eq!(data, &[72, 101, 108, 108, 111]); - - unsafe { crate::memory::free_bytes(result) }; - } - - #[test] - fn test_string_to_bytes_case_insensitive_encoding() { - // Test: encoding names should be case-insensitive - let input = CString::new("Test").unwrap(); - let encoding_variants = vec!["utf8", "UTF8", "Utf8", "ascii", "ASCII"]; - - for enc in encoding_variants { - let encoding = CString::new(enc).unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - !result.is_null(), - "Encoding '{}' should be recognized (case-insensitive)", - enc - ); - - unsafe { crate::memory::free_bytes(result) }; - } - } - - #[test] - fn test_string_to_bytes_encoding_with_hyphens() { - // Test: encoding names with hyphens should work - let input = CString::new("Test").unwrap(); - let encoding_variants = vec!["UTF-8", "UTF-16", "UTF-32"]; - - for enc in encoding_variants { - let encoding = CString::new(enc).unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!(!result.is_null(), "Encoding '{}' should work", enc); - - unsafe { crate::memory::free_bytes(result) }; - } - } - - #[test] - fn test_string_to_bytes_ascii_rejects_non_ascii() { - // Test: ASCII encoding should reject strings with non-ASCII characters - let input = CString::new("Café").unwrap(); // Contains accented character - let encoding = CString::new("ASCII").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - result.is_null(), - "ASCII encoding should reject non-ASCII string" - ); - assert_eq!( - out_length, 0, - "Output length should be 0 for rejected input" - ); - } - - #[test] - fn test_string_to_bytes_various_lengths() { - // Test: various string lengths encode correctly - let test_cases = vec![ - ("", 0), // Empty string - ("A", 1), // Single character - ("AB", 2), // Two characters - ("ABC", 3), // Three characters - ("Test String", 11), // Multi-word string - ]; - - let encoding = CString::new("UTF8").unwrap(); - - for (test_str, expected_length) in test_cases { - let input = CString::new(test_str).unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - - assert!( - !result.is_null(), - "Result should not be null for input '{}'", - test_str - ); - assert_eq!( - out_length, expected_length, - "Output length should be {} for input '{}'", - expected_length, test_str - ); - - unsafe { crate::memory::free_bytes(result) }; - } - } - - #[test] - fn test_string_to_bytes_concurrent_operations() { - use std::thread; - - // Test: multiple threads using string_to_bytes concurrently - let handles: Vec<_> = (0..10) - .map(|i| { - thread::spawn(move || { - let input = CString::new(format!("test{}", i)).unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - let result = unsafe { - string_to_bytes( - input.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - assert!(!result.is_null(), "Encoding should succeed in thread {}", i); - assert!( - out_length > 0, - "Output length should be > 0 in thread {}", - i - ); - - unsafe { crate::memory::free_bytes(result) }; - }) - }) - .collect(); - - for handle in handles { - handle.join().unwrap(); - } - } - - // ========== Tests for bytes_to_string ========== - - #[test] - fn test_bytes_to_string_happy_path_utf8() { - // Test: convert [72, 101, 108, 108, 111] with UTF8 encoding to "Hello" - let bytes: [u8; 5] = [72, 101, 108, 108, 111]; - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!(!result.is_null(), "Result should not be null"); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "Hello", "Should decode to 'Hello'"); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_string_empty_bytes() { - // Test: empty byte array should return empty string - let bytes: [u8; 0] = []; - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), 0, encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for empty bytes" - ); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "", "Should return empty string"); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_string_null_bytes_with_length() { - // Test: null bytes pointer with non-zero length should return null - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { bytes_to_string(std::ptr::null(), 5, encoding.as_ptr()) }; - - assert!( - result.is_null(), - "Null bytes with length > 0 should return null" - ); - } - - #[test] - fn test_bytes_to_string_null_bytes_with_zero_length() { - // Test: null bytes pointer with zero length should succeed (edge case) - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { bytes_to_string(std::ptr::null(), 0, encoding.as_ptr()) }; - - assert!(!result.is_null(), "Null bytes with length 0 should succeed"); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "", "Should return empty string"); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_string_null_encoding() { - // Test: null encoding pointer should return null - let bytes: [u8; 5] = [72, 101, 108, 108, 111]; - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), std::ptr::null()) }; - - assert!(result.is_null(), "Null encoding should return null"); - } - - #[test] - fn test_bytes_to_string_all_encodings() { - // Test: all supported encodings should work - let encodings_and_bytes: Vec<(&str, Vec)> = vec![ - ("UTF8", vec![72, 101, 108, 108, 111]), // "Hello" in UTF-8 - ("ASCII", vec![72, 101, 108, 108, 111]), // "Hello" in ASCII - ("Unicode", vec![72, 0, 101, 0, 108, 0, 108, 0, 111, 0]), // "Hello" in UTF-16LE - ( - "BigEndianUnicode", - vec![0, 72, 0, 101, 0, 108, 0, 108, 0, 111], - ), // "Hello" in UTF-16BE - ( - "UTF32", - vec![ - 72, 0, 0, 0, 101, 0, 0, 0, 108, 0, 0, 0, 108, 0, 0, 0, 111, 0, 0, 0, - ], - ), // "Hello" in UTF-32LE - ("Default", vec![72, 101, 108, 108, 111]), // "Hello" in Default (UTF-8) - ]; - - for (enc, bytes) in encodings_and_bytes { - let encoding = CString::new(enc).unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for encoding: {}", - enc - ); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, "Hello", - "Should decode to 'Hello' for encoding: {}", - enc - ); - - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_bytes_to_string_utf7_deprecated() { - // Test: UTF7 encoding should return null (deprecated) - let bytes: [u8; 5] = [72, 101, 108, 108, 111]; - let encoding = CString::new("UTF7").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!( - result.is_null(), - "UTF7 encoding should return null (deprecated)" - ); - } - - #[test] - fn test_bytes_to_string_invalid_encoding() { - // Test: invalid encoding name should return null - let bytes: [u8; 5] = [72, 101, 108, 108, 111]; - let encoding = CString::new("INVALID_ENCODING").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Invalid encoding should return null"); - } - - #[test] - fn test_bytes_to_string_invalid_utf8_bytes() { - // Test: invalid UTF-8 byte sequence should return null - let bytes: [u8; 2] = [0xFF, 0xFE]; // Invalid UTF-8 sequence - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Invalid UTF-8 bytes should return null"); - } - - #[test] - fn test_bytes_to_string_result_contains_null_byte() { - // Test: UTF-32 bytes that decode to a string containing a null character - // U+0000 (null) in UTF-32LE = [0x00, 0x00, 0x00, 0x00] - let bytes: [u8; 8] = [0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; // "A" + null - let encoding = CString::new("UTF32").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!( - result.is_null(), - "Result containing null byte should return null" - ); - } - - #[test] - fn test_bytes_to_string_invalid_utf16_length() { - // Test: odd-length byte array for UTF-16 should return null - let bytes: [u8; 3] = [72, 0, 101]; // Odd length, invalid for UTF-16 - let encoding = CString::new("Unicode").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!( - result.is_null(), - "Odd-length UTF-16 bytes should return null" - ); - } - - #[test] - fn test_bytes_to_string_invalid_utf32_length() { - // Test: non-multiple-of-4 byte array for UTF-32 should return null - let bytes: [u8; 5] = [72, 0, 0, 0, 101]; // Not multiple of 4 - let encoding = CString::new("UTF32").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!( - result.is_null(), - "Non-multiple-of-4 UTF-32 bytes should return null" - ); - } - - #[test] - fn test_bytes_to_string_ascii_rejects_non_ascii() { - // Test: ASCII encoding should reject bytes > 127 - let bytes: [u8; 3] = [72, 200, 111]; // 200 is not valid ASCII - let encoding = CString::new("ASCII").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!(result.is_null(), "ASCII should reject non-ASCII bytes"); - } - - #[test] - fn test_bytes_to_string_unicode_emoji() { - // Test: UTF-8 bytes for emoji should decode correctly - // 🌍 = U+1F30D = F0 9F 8C 8D in UTF-8 - let bytes: [u8; 4] = [0xF0, 0x9F, 0x8C, 0x8D]; - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!(!result.is_null(), "Result should not be null for emoji"); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!(result_str, "🌍", "Should decode to earth emoji"); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_string_large_input() { - // Test: 1MB of bytes should decode successfully - let large_bytes: Vec = vec![65u8; 1024 * 1024]; // 1MB of 'A' - let encoding = CString::new("UTF8").unwrap(); - - let result = - unsafe { bytes_to_string(large_bytes.as_ptr(), large_bytes.len(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Result should not be null for large input" - ); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str.len(), - 1024 * 1024, - "Should have 1MB of characters" - ); - - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_bytes_to_string_round_trip_utf8() { - // Test: string -> bytes -> string round-trip - let original = CString::new("Hello, World! 🌍").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - let mut out_length: usize = 0; - - // String to bytes - let bytes_ptr = unsafe { - string_to_bytes( - original.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - assert!(!bytes_ptr.is_null(), "string_to_bytes should succeed"); - - // Bytes to string - let result = unsafe { bytes_to_string(bytes_ptr, out_length, encoding.as_ptr()) }; - assert!(!result.is_null(), "bytes_to_string should succeed"); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, "Hello, World! 🌍", - "Round-trip should preserve string" - ); - - unsafe { - crate::memory::free_bytes(bytes_ptr); - crate::memory::free_string(result); - }; - } - - #[test] - fn test_bytes_to_string_round_trip_all_encodings() { - // Test: round-trip for all encodings - let encodings = vec![ - "UTF8", - "ASCII", - "Unicode", - "BigEndianUnicode", - "UTF32", - "Default", - ]; - - for enc in encodings { - let original = CString::new("Test").unwrap(); // ASCII-safe for all encodings - let encoding = CString::new(enc).unwrap(); - let mut out_length: usize = 0; - - // String to bytes - let bytes_ptr = unsafe { - string_to_bytes( - original.as_ptr(), - encoding.as_ptr(), - &mut out_length as *mut usize, - ) - }; - assert!( - !bytes_ptr.is_null(), - "string_to_bytes should succeed for {}", - enc - ); - - // Bytes to string - let result = unsafe { bytes_to_string(bytes_ptr, out_length, encoding.as_ptr()) }; - assert!( - !result.is_null(), - "bytes_to_string should succeed for {}", - enc - ); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, "Test", - "Round-trip should preserve string for {}", - enc - ); - - unsafe { - crate::memory::free_bytes(bytes_ptr); - crate::memory::free_string(result); - }; - } - } - - #[test] - fn test_bytes_to_string_case_insensitive_encoding() { - // Test: encoding names should be case-insensitive - let bytes: [u8; 4] = [84, 101, 115, 116]; // "Test" in UTF-8 - let encoding_variants = vec!["utf8", "UTF8", "Utf8", "ascii", "ASCII"]; - - for enc in encoding_variants { - let encoding = CString::new(enc).unwrap(); - - let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - - assert!( - !result.is_null(), - "Encoding '{}' should be recognized (case-insensitive)", - enc - ); - - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_bytes_to_string_concurrent_operations() { - use std::thread; - - // Test: multiple threads using bytes_to_string concurrently - let handles: Vec<_> = (0..10) - .map(|i| { - thread::spawn(move || { - let bytes: [u8; 5] = [72, 101, 108, 108, 111]; // "Hello" - let encoding = CString::new("UTF8").unwrap(); - - let result = - unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; - assert!(!result.is_null(), "Decoding should succeed in thread {}", i); - - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, "Hello", - "Should decode to 'Hello' in thread {}", - i - ); - - unsafe { crate::memory::free_string(result) }; - }) - }) - .collect(); - - for handle in handles { - handle.join().unwrap(); - } - } -} diff --git a/lib/src/encoding/bytes_to_string.rs b/lib/src/encoding/bytes_to_string.rs new file mode 100644 index 0000000..6431c17 --- /dev/null +++ b/lib/src/encoding/bytes_to_string.rs @@ -0,0 +1,509 @@ +//! Byte array to string conversion + +use std::ffi::CStr; +use std::os::raw::c_char; + +/// Convert a byte array to a string using the specified encoding +/// +/// Supports UTF-8, ASCII, Unicode (UTF-16LE), UTF-32, BigEndianUnicode (UTF-16BE), +/// and Default (UTF-8) encodings. The encoding name is case-insensitive and supports +/// both hyphenated (UTF-8) and non-hyphenated (UTF8) variants. +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `bytes` is a valid pointer to a byte array of at least `length` bytes, or null if length is 0 +/// - `encoding` is a valid null-terminated C string or null +/// - The returned pointer must be freed using `free_string` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn bytes_to_string( + bytes: *const u8, + length: usize, + encoding: *const c_char, +) -> *mut c_char { + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + if length == 0 { + crate::error::clear_error(); + let empty = std::ffi::CString::new("").unwrap(); + return empty.into_raw(); + } + + if bytes.is_null() { + crate::error::set_error("Bytes pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let byte_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; + + if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { + crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); + return std::ptr::null_mut(); + } + + let result_string = match crate::base64::convert_bytes_to_string(byte_slice, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match std::ffi::CString::new(result_string) { + Ok(c_string) => { + crate::error::clear_error(); + c_string.into_raw() + } + Err(_) => { + crate::error::set_error("Result string contains null byte".to_string()); + std::ptr::null_mut() + } + } +} + +/// Convert a byte array to a string using the specified encoding with Latin-1 fallback +/// +/// This is a lenient version of `bytes_to_string` that automatically falls back to +/// Latin-1 (ISO-8859-1) encoding when the byte sequence is invalid for the specified +/// encoding. This is useful for handling binary data (like certificates) that may not +/// be valid text in any standard encoding. +/// +/// Use this function when you want best-effort conversion without errors. +/// Use `bytes_to_string` when you want strict validation of the encoding. +/// +/// # Safety +/// Same safety requirements as `bytes_to_string`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn bytes_to_string_lenient( + bytes: *const u8, + length: usize, + encoding: *const c_char, +) -> *mut c_char { + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + if length == 0 { + crate::error::clear_error(); + let empty = std::ffi::CString::new("").unwrap(); + return empty.into_raw(); + } + + if bytes.is_null() { + crate::error::set_error("Bytes pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let byte_slice = unsafe { std::slice::from_raw_parts(bytes, length) }; + + if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { + crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); + return std::ptr::null_mut(); + } + + let result_string = + match crate::base64::convert_bytes_to_string_with_fallback(byte_slice, encoding_str) { + Ok(s) => s, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match std::ffi::CString::new(result_string) { + Ok(c_string) => { + crate::error::clear_error(); + c_string.into_raw() + } + Err(_) => { + crate::error::set_error("Result string contains null byte".to_string()); + std::ptr::null_mut() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + #[test] + fn test_bytes_to_string_happy_path_utf8() { + let bytes: [u8; 5] = [72, 101, 108, 108, 111]; + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!(!result.is_null(), "Result should not be null"); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, "Hello", "Should decode to 'Hello'"); + + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_bytes_to_string_empty_bytes() { + let bytes: [u8; 0] = []; + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), 0, encoding.as_ptr()) }; + + assert!( + !result.is_null(), + "Result should not be null for empty bytes" + ); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, "", "Should return empty string"); + + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_bytes_to_string_null_bytes_with_length() { + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { bytes_to_string(std::ptr::null(), 5, encoding.as_ptr()) }; + + assert!( + result.is_null(), + "Null bytes with length > 0 should return null" + ); + } + + #[test] + fn test_bytes_to_string_null_bytes_with_zero_length() { + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { bytes_to_string(std::ptr::null(), 0, encoding.as_ptr()) }; + + assert!(!result.is_null(), "Null bytes with length 0 should succeed"); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, "", "Should return empty string"); + + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_bytes_to_string_null_encoding() { + let bytes: [u8; 5] = [72, 101, 108, 108, 111]; + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), std::ptr::null()) }; + + assert!(result.is_null(), "Null encoding should return null"); + } + + #[test] + fn test_bytes_to_string_all_encodings() { + let encodings_and_bytes: Vec<(&str, Vec)> = vec![ + ("UTF8", vec![72, 101, 108, 108, 111]), + ("ASCII", vec![72, 101, 108, 108, 111]), + ("Unicode", vec![72, 0, 101, 0, 108, 0, 108, 0, 111, 0]), + ( + "BigEndianUnicode", + vec![0, 72, 0, 101, 0, 108, 0, 108, 0, 111], + ), + ( + "UTF32", + vec![ + 72, 0, 0, 0, 101, 0, 0, 0, 108, 0, 0, 0, 108, 0, 0, 0, 111, 0, 0, 0, + ], + ), + ("Default", vec![72, 101, 108, 108, 111]), + ]; + + for (enc, bytes) in encodings_and_bytes { + let encoding = CString::new(enc).unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!( + !result.is_null(), + "Result should not be null for encoding: {}", + enc + ); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str, "Hello", + "Should decode to 'Hello' for encoding: {}", + enc + ); + + unsafe { crate::memory::free_string(result) }; + } + } + + #[test] + fn test_bytes_to_string_utf7_deprecated() { + let bytes: [u8; 5] = [72, 101, 108, 108, 111]; + let encoding = CString::new("UTF7").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!( + result.is_null(), + "UTF7 encoding should return null (deprecated)" + ); + } + + #[test] + fn test_bytes_to_string_invalid_encoding() { + let bytes: [u8; 5] = [72, 101, 108, 108, 111]; + let encoding = CString::new("INVALID_ENCODING").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!(result.is_null(), "Invalid encoding should return null"); + } + + #[test] + fn test_bytes_to_string_invalid_utf8_bytes() { + let bytes: [u8; 2] = [0xFF, 0xFE]; + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!(result.is_null(), "Invalid UTF-8 bytes should return null"); + } + + #[test] + fn test_bytes_to_string_result_contains_null_byte() { + let bytes: [u8; 8] = [0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let encoding = CString::new("UTF32").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!( + result.is_null(), + "Result containing null byte should return null" + ); + } + + #[test] + fn test_bytes_to_string_invalid_utf16_length() { + let bytes: [u8; 3] = [72, 0, 101]; + let encoding = CString::new("Unicode").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!( + result.is_null(), + "Odd-length UTF-16 bytes should return null" + ); + } + + #[test] + fn test_bytes_to_string_invalid_utf32_length() { + let bytes: [u8; 5] = [72, 0, 0, 0, 101]; + let encoding = CString::new("UTF32").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!( + result.is_null(), + "Non-multiple-of-4 UTF-32 bytes should return null" + ); + } + + #[test] + fn test_bytes_to_string_ascii_rejects_non_ascii() { + let bytes: [u8; 3] = [72, 200, 111]; + let encoding = CString::new("ASCII").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!(result.is_null(), "ASCII should reject non-ASCII bytes"); + } + + #[test] + fn test_bytes_to_string_unicode_emoji() { + let bytes: [u8; 4] = [0xF0, 0x9F, 0x8C, 0x8D]; + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!(!result.is_null(), "Result should not be null for emoji"); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!(result_str, "🌍", "Should decode to earth emoji"); + + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_bytes_to_string_large_input() { + let large_bytes: Vec = vec![65u8; 1024 * 1024]; + let encoding = CString::new("UTF8").unwrap(); + + let result = + unsafe { bytes_to_string(large_bytes.as_ptr(), large_bytes.len(), encoding.as_ptr()) }; + + assert!( + !result.is_null(), + "Result should not be null for large input" + ); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str.len(), + 1024 * 1024, + "Should have 1MB of characters" + ); + + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_bytes_to_string_round_trip_utf8() { + let original = CString::new("Hello, World! 🌍").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let bytes_ptr = unsafe { + crate::encoding::string_to_bytes( + original.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + assert!(!bytes_ptr.is_null(), "string_to_bytes should succeed"); + + let result = unsafe { bytes_to_string(bytes_ptr, out_length, encoding.as_ptr()) }; + assert!(!result.is_null(), "bytes_to_string should succeed"); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str, "Hello, World! 🌍", + "Round-trip should preserve string" + ); + + unsafe { + crate::memory::free_bytes(bytes_ptr); + crate::memory::free_string(result); + }; + } + + #[test] + fn test_bytes_to_string_round_trip_all_encodings() { + let encodings = vec![ + "UTF8", + "ASCII", + "Unicode", + "BigEndianUnicode", + "UTF32", + "Default", + ]; + + for enc in encodings { + let original = CString::new("Test").unwrap(); + let encoding = CString::new(enc).unwrap(); + let mut out_length: usize = 0; + + let bytes_ptr = unsafe { + crate::encoding::string_to_bytes( + original.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + assert!( + !bytes_ptr.is_null(), + "string_to_bytes should succeed for {}", + enc + ); + + let result = unsafe { bytes_to_string(bytes_ptr, out_length, encoding.as_ptr()) }; + assert!( + !result.is_null(), + "bytes_to_string should succeed for {}", + enc + ); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str, "Test", + "Round-trip should preserve string for {}", + enc + ); + + unsafe { + crate::memory::free_bytes(bytes_ptr); + crate::memory::free_string(result); + }; + } + } + + #[test] + fn test_bytes_to_string_case_insensitive_encoding() { + let bytes: [u8; 4] = [84, 101, 115, 116]; + let encoding_variants = vec!["utf8", "UTF8", "Utf8", "ascii", "ASCII"]; + + for enc in encoding_variants { + let encoding = CString::new(enc).unwrap(); + + let result = unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + + assert!( + !result.is_null(), + "Encoding '{}' should be recognized (case-insensitive)", + enc + ); + + unsafe { crate::memory::free_string(result) }; + } + } + + #[test] + fn test_bytes_to_string_concurrent_operations() { + use std::thread; + + let handles: Vec<_> = (0..10) + .map(|i| { + thread::spawn(move || { + let bytes: [u8; 5] = [72, 101, 108, 108, 111]; + let encoding = CString::new("UTF8").unwrap(); + + let result = + unsafe { bytes_to_string(bytes.as_ptr(), bytes.len(), encoding.as_ptr()) }; + assert!(!result.is_null(), "Decoding should succeed in thread {}", i); + + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str, "Hello", + "Should decode to 'Hello' in thread {}", + i + ); + + unsafe { crate::memory::free_string(result) }; + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + } +} diff --git a/lib/src/encoding/helpers.rs b/lib/src/encoding/helpers.rs new file mode 100644 index 0000000..20efeea --- /dev/null +++ b/lib/src/encoding/helpers.rs @@ -0,0 +1,13 @@ +//! Helper functions for encoding operations + +/// Sets output length to zero if pointer is non-null. +/// +/// Used in error paths to ensure consistent behavior. +#[inline] +pub(crate) fn set_output_length_zero(out_length: *mut usize) { + if !out_length.is_null() { + unsafe { + *out_length = 0; + } + } +} diff --git a/lib/src/encoding/mod.rs b/lib/src/encoding/mod.rs new file mode 100644 index 0000000..34a2e50 --- /dev/null +++ b/lib/src/encoding/mod.rs @@ -0,0 +1,9 @@ +//! String to byte array encoding functions + +mod bytes_to_string; +mod helpers; +mod string_to_bytes; + +// Re-export public FFI functions +pub use bytes_to_string::{bytes_to_string, bytes_to_string_lenient}; +pub use string_to_bytes::string_to_bytes; diff --git a/lib/src/encoding/string_to_bytes.rs b/lib/src/encoding/string_to_bytes.rs new file mode 100644 index 0000000..f015871 --- /dev/null +++ b/lib/src/encoding/string_to_bytes.rs @@ -0,0 +1,624 @@ +//! String to byte array conversion + +use std::ffi::CStr; +use std::os::raw::c_char; + +use super::helpers::set_output_length_zero; + +/// Convert a string to a byte array using the specified encoding +/// +/// Supports UTF-8, ASCII, Unicode (UTF-16LE), UTF-32, BigEndianUnicode (UTF-16BE), +/// and Default (UTF-8) encodings. The encoding name is case-insensitive and supports +/// both hyphenated (UTF-8) and non-hyphenated (UTF8) variants. +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `input` is a valid null-terminated C string or null +/// - `encoding` is a valid null-terminated C string or null +/// - `out_length` is a valid pointer to a usize or null (optional) +/// - The returned pointer must be freed using `free_bytes` +#[unsafe(no_mangle)] +pub unsafe extern "C" fn string_to_bytes( + input: *const c_char, + encoding: *const c_char, + out_length: *mut usize, +) -> *mut u8 { + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + set_output_length_zero(out_length); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + set_output_length_zero(out_length); + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + set_output_length_zero(out_length); + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + set_output_length_zero(out_length); + return std::ptr::null_mut(); + } + }; + + if encoding_str.eq_ignore_ascii_case("UTF7") || encoding_str.eq_ignore_ascii_case("UTF-7") { + crate::error::set_error("UTF7 encoding is deprecated and not supported".to_string()); + set_output_length_zero(out_length); + return std::ptr::null_mut(); + } + + let bytes = match crate::base64::convert_string_to_bytes(input_str, encoding_str) { + Ok(b) => b, + Err(e) => { + crate::error::set_error(e); + set_output_length_zero(out_length); + return std::ptr::null_mut(); + } + }; + + let length = bytes.len(); + if !out_length.is_null() { + unsafe { + *out_length = length; + } + } + + crate::error::clear_error(); + crate::memory::allocate_byte_array(bytes) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + #[test] + fn test_string_to_bytes_happy_path_utf8() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Result should not be null"); + assert_eq!(out_length, 5, "Output length should be 5 bytes"); + + let byte_slice = unsafe { std::slice::from_raw_parts(result, out_length) }; + assert_eq!( + byte_slice, + &[72, 101, 108, 108, 111], + "Bytes should be [72, 101, 108, 108, 111] (Hello)" + ); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_null_input_pointer() { + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + std::ptr::null(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(result.is_null(), "Null input pointer should return null"); + assert_eq!(out_length, 0, "Output length should be 0 for null input"); + } + + #[test] + fn test_string_to_bytes_null_encoding_pointer() { + let input = CString::new("Hello").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + std::ptr::null(), + &mut out_length as *mut usize, + ) + }; + + assert!(result.is_null(), "Null encoding pointer should return null"); + assert_eq!(out_length, 0, "Output length should be 0 for null encoding"); + } + + #[test] + fn test_string_to_bytes_all_supported_encodings() { + let input = CString::new("Test").unwrap(); + let encodings = vec![ + "UTF8", + "ASCII", + "Unicode", + "UTF32", + "BigEndianUnicode", + "Default", + ]; + + for enc in encodings { + let encoding = CString::new(enc).unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + !result.is_null(), + "Result should not be null for encoding: {}", + enc + ); + assert!( + out_length > 0, + "Output length should be > 0 for encoding: {}", + enc + ); + + unsafe { crate::memory::free_bytes(result) }; + } + } + + #[test] + fn test_string_to_bytes_utf7_deprecated() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("UTF7").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + result.is_null(), + "UTF7 encoding should return null (deprecated)" + ); + assert_eq!(out_length, 0, "Output length should be 0 for UTF7"); + } + + #[test] + fn test_string_to_bytes_invalid_encoding() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("INVALID_ENCODING").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(result.is_null(), "Invalid encoding should return null"); + assert_eq!( + out_length, 0, + "Output length should be 0 for invalid encoding" + ); + } + + #[test] + fn test_string_to_bytes_empty_string() { + let input = CString::new("").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + !result.is_null(), + "Result should not be null for empty string" + ); + assert_eq!(out_length, 0, "Output length should be 0 for empty string"); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_large_string() { + let large_string = "A".repeat(1024 * 1024); + let input = CString::new(large_string).unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + !result.is_null(), + "Result should not be null for large string" + ); + assert_eq!(out_length, 1024 * 1024, "Output length should be 1MB"); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_utf8_encoding() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Result should not be null"); + assert_eq!(out_length, 5, "UTF8 'Hello' should be 5 bytes"); + + let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; + assert_eq!(bytes, &[72, 101, 108, 108, 111]); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_ascii_encoding() { + let input = CString::new("ABC").unwrap(); + let encoding = CString::new("ASCII").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Result should not be null"); + assert_eq!(out_length, 3, "ASCII 'ABC' should be 3 bytes"); + + let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; + assert_eq!(bytes, &[65, 66, 67]); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_unicode_encoding() { + let input = CString::new("A").unwrap(); + let encoding = CString::new("Unicode").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Result should not be null"); + assert_eq!(out_length, 2, "Unicode 'A' should be 2 bytes"); + + let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; + assert_eq!(bytes, &[0x41, 0x00]); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_utf32_encoding() { + let input = CString::new("A").unwrap(); + let encoding = CString::new("UTF32").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Result should not be null"); + assert_eq!(out_length, 4, "UTF32 'A' should be 4 bytes"); + + let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; + assert_eq!(bytes, &[0x41, 0x00, 0x00, 0x00]); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_bigendian_unicode_encoding() { + let input = CString::new("A").unwrap(); + let encoding = CString::new("BigEndianUnicode").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Result should not be null"); + assert_eq!(out_length, 2, "BigEndianUnicode 'A' should be 2 bytes"); + + let bytes = unsafe { std::slice::from_raw_parts(result, out_length) }; + assert_eq!(bytes, &[0x00, 0x41]); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_default_encoding() { + let input = CString::new("Test").unwrap(); + let encoding = CString::new("Default").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Result should not be null"); + assert_eq!(out_length, 4, "Default 'Test' should be 4 bytes (UTF8)"); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_special_characters() { + let input = CString::new("Hello, World!").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + !result.is_null(), + "Result should not be null for special characters" + ); + assert_eq!(out_length, 13, "UTF8 'Hello, World!' should be 13 bytes"); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_unicode_characters() { + let input = CString::new("Hello 🌍").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + !result.is_null(), + "Result should not be null for Unicode characters" + ); + assert_eq!(out_length, 10, "UTF8 'Hello 🌍' should be 10 bytes"); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_null_output_length_pointer() { + let input = CString::new("Hello").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = + unsafe { string_to_bytes(input.as_ptr(), encoding.as_ptr(), std::ptr::null_mut()) }; + + assert!( + !result.is_null(), + "Should succeed with null out_length pointer" + ); + + let data = unsafe { std::slice::from_raw_parts(result, 5) }; + assert_eq!(data, &[72, 101, 108, 108, 111]); + + unsafe { crate::memory::free_bytes(result) }; + } + + #[test] + fn test_string_to_bytes_case_insensitive_encoding() { + let input = CString::new("Test").unwrap(); + let encoding_variants = vec!["utf8", "UTF8", "Utf8", "ascii", "ASCII"]; + + for enc in encoding_variants { + let encoding = CString::new(enc).unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + !result.is_null(), + "Encoding '{}' should be recognized (case-insensitive)", + enc + ); + + unsafe { crate::memory::free_bytes(result) }; + } + } + + #[test] + fn test_string_to_bytes_encoding_with_hyphens() { + let input = CString::new("Test").unwrap(); + let encoding_variants = vec!["UTF-8", "UTF-16", "UTF-32"]; + + for enc in encoding_variants { + let encoding = CString::new(enc).unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!(!result.is_null(), "Encoding '{}' should work", enc); + + unsafe { crate::memory::free_bytes(result) }; + } + } + + #[test] + fn test_string_to_bytes_ascii_rejects_non_ascii() { + let input = CString::new("Café").unwrap(); + let encoding = CString::new("ASCII").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + result.is_null(), + "ASCII encoding should reject non-ASCII string" + ); + assert_eq!( + out_length, 0, + "Output length should be 0 for rejected input" + ); + } + + #[test] + fn test_string_to_bytes_various_lengths() { + let test_cases = vec![ + ("", 0), + ("A", 1), + ("AB", 2), + ("ABC", 3), + ("Test String", 11), + ]; + + let encoding = CString::new("UTF8").unwrap(); + + for (test_str, expected_length) in test_cases { + let input = CString::new(test_str).unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + + assert!( + !result.is_null(), + "Result should not be null for input '{}'", + test_str + ); + assert_eq!( + out_length, expected_length, + "Output length should be {} for input '{}'", + expected_length, test_str + ); + + unsafe { crate::memory::free_bytes(result) }; + } + } + + #[test] + fn test_string_to_bytes_concurrent_operations() { + use std::thread; + + let handles: Vec<_> = (0..10) + .map(|i| { + thread::spawn(move || { + let input = CString::new(format!("test{}", i)).unwrap(); + let encoding = CString::new("UTF8").unwrap(); + let mut out_length: usize = 0; + + let result = unsafe { + string_to_bytes( + input.as_ptr(), + encoding.as_ptr(), + &mut out_length as *mut usize, + ) + }; + assert!(!result.is_null(), "Encoding should succeed in thread {}", i); + assert!( + out_length > 0, + "Output length should be > 0 in thread {}", + i + ); + + unsafe { crate::memory::free_bytes(result) }; + }) + }) + .collect(); + + for handle in handles { + handle.join().unwrap(); + } + } +} diff --git a/lib/src/hash/algorithms.rs b/lib/src/hash/algorithms.rs new file mode 100644 index 0000000..10ed845 --- /dev/null +++ b/lib/src/hash/algorithms.rs @@ -0,0 +1,114 @@ +//! Core hash and HMAC algorithm implementations + +use hmac::{Hmac, Mac}; +use md5::Md5; +use sha1::Sha1; +use sha2::{Digest, Sha256, Sha384, Sha512}; + +/// Computes hash for the given bytes using the specified algorithm. +/// +/// Returns uppercase hexadecimal string for .NET compatibility. +pub(crate) fn compute_hash_bytes(bytes: &[u8], algorithm: &str) -> Result { + match algorithm.to_uppercase().as_str() { + "MD5" => { + let mut hasher = Md5::new(); + hasher.update(bytes); + Ok(format!("{:X}", hasher.finalize())) + } + "SHA1" => { + let mut hasher = Sha1::new(); + hasher.update(bytes); + Ok(format!("{:X}", hasher.finalize())) + } + "SHA256" => { + let mut hasher = Sha256::new(); + hasher.update(bytes); + Ok(format!("{:X}", hasher.finalize())) + } + "SHA384" => { + let mut hasher = Sha384::new(); + hasher.update(bytes); + Ok(format!("{:X}", hasher.finalize())) + } + "SHA512" => { + let mut hasher = Sha512::new(); + hasher.update(bytes); + Ok(format!("{:X}", hasher.finalize())) + } + _ => Err(format!( + "Unsupported algorithm: {}. Supported: MD5, SHA1, SHA256, SHA384, SHA512", + algorithm + )), + } +} + +/// Computes HMAC using the specified algorithm. +/// +/// Returns uppercase hexadecimal string for .NET compatibility. +pub(crate) fn compute_hmac_internal( + algorithm: &str, + key: &[u8], + input: &[u8], +) -> Result { + match algorithm.to_uppercase().as_str() { + "MD5" => compute_hmac_md5(key, input), + "SHA1" => compute_hmac_sha1(key, input), + "SHA256" => compute_hmac_sha256(key, input), + "SHA384" => compute_hmac_sha384(key, input), + "SHA512" => compute_hmac_sha512(key, input), + _ => Err(format!( + "Unsupported algorithm: {}. Supported: MD5, SHA1, SHA256, SHA384, SHA512", + algorithm + )), + } +} + +/// Compute HMAC-MD5 +#[inline] +fn compute_hmac_md5(key: &[u8], input: &[u8]) -> Result { + type HmacMd5 = Hmac; + let mut mac = HmacMd5::new_from_slice(key) + .map_err(|_| "Failed to create HMAC-MD5 instance".to_string())?; + mac.update(input); + Ok(format!("{:X}", mac.finalize().into_bytes())) +} + +/// Compute HMAC-SHA1 +#[inline] +fn compute_hmac_sha1(key: &[u8], input: &[u8]) -> Result { + type HmacSha1 = Hmac; + let mut mac = HmacSha1::new_from_slice(key) + .map_err(|_| "Failed to create HMAC-SHA1 instance".to_string())?; + mac.update(input); + Ok(format!("{:X}", mac.finalize().into_bytes())) +} + +/// Compute HMAC-SHA256 +#[inline] +fn compute_hmac_sha256(key: &[u8], input: &[u8]) -> Result { + type HmacSha256 = Hmac; + let mut mac = HmacSha256::new_from_slice(key) + .map_err(|_| "Failed to create HMAC-SHA256 instance".to_string())?; + mac.update(input); + Ok(format!("{:X}", mac.finalize().into_bytes())) +} + +/// Compute HMAC-SHA384 +#[inline] +fn compute_hmac_sha384(key: &[u8], input: &[u8]) -> Result { + type HmacSha384 = Hmac; + let mut mac = HmacSha384::new_from_slice(key) + .map_err(|_| "Failed to create HMAC-SHA384 instance".to_string())?; + mac.update(input); + Ok(format!("{:X}", mac.finalize().into_bytes())) +} + +/// Compute HMAC-SHA512 +#[inline] +fn compute_hmac_sha512(key: &[u8], input: &[u8]) -> Result { + type HmacSha512 = Hmac; + let mut mac = HmacSha512::new_from_slice(key) + .map_err(|_| "Failed to create HMAC-SHA512 instance".to_string())?; + mac.update(input); + Ok(format!("{:X}", mac.finalize().into_bytes())) +} diff --git a/lib/src/hash/hash_ops.rs b/lib/src/hash/hash_ops.rs new file mode 100644 index 0000000..43cafe0 --- /dev/null +++ b/lib/src/hash/hash_ops.rs @@ -0,0 +1,218 @@ +//! Hash computation operations + +use std::ffi::{CStr, CString}; +use std::os::raw::c_char; + +use super::algorithms::compute_hash_bytes; + +/// Compute a cryptographic hash of a string +/// +/// # Safety +/// This function is unsafe because it dereferences raw pointers. +/// The caller must ensure that: +/// - `input` is a valid null-terminated C string or null +/// - `algorithm` is a valid null-terminated C string or null +/// - `encoding` is a valid null-terminated C string or null +/// - The returned pointer must be freed using `free_string` +/// +/// # Supported Algorithms +/// - MD5 +/// - SHA1 +/// - SHA256 +/// - SHA384 +/// - SHA512 +#[unsafe(no_mangle)] +pub unsafe extern "C" fn compute_hash( + input: *const c_char, + algorithm: *const c_char, + encoding: *const c_char, +) -> *mut c_char { + crate::error::clear_error(); + + if input.is_null() { + crate::error::set_error("Input pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if algorithm.is_null() { + crate::error::set_error("Algorithm pointer is null".to_string()); + return std::ptr::null_mut(); + } + + if encoding.is_null() { + crate::error::set_error("Encoding pointer is null".to_string()); + return std::ptr::null_mut(); + } + + let input_str = match unsafe { CStr::from_ptr(input).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in input string".to_string()); + return std::ptr::null_mut(); + } + }; + + let algorithm_str = match unsafe { CStr::from_ptr(algorithm).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in algorithm string".to_string()); + return std::ptr::null_mut(); + } + }; + + let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { + Ok(s) => s, + Err(_) => { + crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); + return std::ptr::null_mut(); + } + }; + + let bytes = match crate::base64::convert_string_to_bytes(input_str, encoding_str) { + Ok(b) => b, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + let hash_hex = match compute_hash_bytes(&bytes, algorithm_str) { + Ok(hex) => hex, + Err(e) => { + crate::error::set_error(e); + return std::ptr::null_mut(); + } + }; + + match CString::new(hash_hex) { + Ok(c_str) => { + crate::error::clear_error(); + c_str.into_raw() + } + Err(_) => { + crate::error::set_error("Failed to create C string from hash result".to_string()); + std::ptr::null_mut() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::CString; + + #[test] + fn test_compute_hash_known_vectors() { + let test_cases = vec![ + ("MD5", "098F6BCD4621D373CADE4E832627B4F6"), + ("SHA1", "A94A8FE5CCB19BA61C4C0873D391E987982FBBD3"), + ( + "SHA256", + "9F86D081884C7D659A2FEAA0C55AD015A3BF4F1B2B0B822CD15D6C15B0F00A08", + ), + ( + "SHA384", + "768412320F7B0AA5812FCE428DC4706B3CAE50E02A64CAA16A782249BFE8EFC4B7EF1CCB126255D196047DFEDF17A0A9", + ), + ( + "SHA512", + "EE26B0DD4AF7E749AA1A8EE3C10AE9923F618980772E473F8819A5D4940E0DB27AC185F8A0E1D5F84F88BC887FD67B143732C304CC5FA9AD8E6F57F50028A8FF", + ), + ]; + + for (algorithm, expected_hash) in test_cases { + let input = CString::new("test").unwrap(); + let algo = CString::new(algorithm).unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { compute_hash(input.as_ptr(), algo.as_ptr(), encoding.as_ptr()) }; + + assert!(!result.is_null(), "{} result should not be null", algorithm); + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str, expected_hash, + "{} hash of 'test' should match known vector", + algorithm + ); + unsafe { crate::memory::free_string(result) }; + } + } + + #[test] + fn test_compute_hash_null_input_returns_null() { + let algorithm = CString::new("SHA256").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = + unsafe { compute_hash(std::ptr::null(), algorithm.as_ptr(), encoding.as_ptr()) }; + + assert!(result.is_null(), "Null input should return null"); + } + + #[test] + fn test_compute_hash_null_algorithm_returns_null() { + let input = CString::new("test").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { compute_hash(input.as_ptr(), std::ptr::null(), encoding.as_ptr()) }; + + assert!(result.is_null(), "Null algorithm should return null"); + } + + #[test] + fn test_compute_hash_null_encoding_returns_null() { + let input = CString::new("test").unwrap(); + let algorithm = CString::new("SHA256").unwrap(); + + let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), std::ptr::null()) }; + + assert!(result.is_null(), "Null encoding should return null"); + } + + #[test] + fn test_compute_hash_unsupported_algorithm_returns_null() { + let input = CString::new("test").unwrap(); + let algorithm = CString::new("UNSUPPORTED").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), encoding.as_ptr()) }; + + assert!(result.is_null(), "Unsupported algorithm should return null"); + } + + #[test] + fn test_compute_hash_empty_string() { + let input = CString::new("").unwrap(); + let algorithm = CString::new("SHA256").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), encoding.as_ptr()) }; + + assert!(!result.is_null(), "Empty string should produce a hash"); + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str, "E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855", + "SHA256 hash of empty string should match known vector" + ); + unsafe { crate::memory::free_string(result) }; + } + + #[test] + fn test_compute_hash_large_string() { + let large_input = "A".repeat(1_000_000); + let input = CString::new(large_input).unwrap(); + let algorithm = CString::new("SHA256").unwrap(); + let encoding = CString::new("UTF8").unwrap(); + + let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), encoding.as_ptr()) }; + + assert!(!result.is_null(), "Large string should produce a hash"); + let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; + assert_eq!( + result_str.len(), + 64, + "SHA256 hash should be 64 hex characters" + ); + unsafe { crate::memory::free_string(result) }; + } +} diff --git a/lib/src/hash.rs b/lib/src/hash/hmac_ops.rs similarity index 59% rename from lib/src/hash.rs rename to lib/src/hash/hmac_ops.rs index e963ca7..6bd6982 100644 --- a/lib/src/hash.rs +++ b/lib/src/hash/hmac_ops.rs @@ -1,173 +1,15 @@ -//! Cryptographic hash functions (MD5, SHA1, SHA256, SHA384, SHA512, HMAC) +//! HMAC computation operations -use hmac::{Hmac, Mac}; -use md5::Md5; -use sha1::Sha1; -use sha2::{Digest, Sha256, Sha384, Sha512}; use std::ffi::{CStr, CString}; use std::os::raw::c_char; -/// Compute a cryptographic hash of a string -/// -/// # Safety -/// This function is unsafe because it dereferences raw pointers. -/// The caller must ensure that: -/// - `input` is a valid null-terminated C string or null -/// - `algorithm` is a valid null-terminated C string or null -/// - `encoding` is a valid null-terminated C string or null -/// - The returned pointer must be freed using `free_string` -/// -/// # Supported Algorithms -/// - MD5 -/// - SHA1 -/// - SHA256 -/// - SHA384 -/// - SHA512 -/// -/// # Returns -/// Hex-encoded hash string, or null on error -#[unsafe(no_mangle)] -pub unsafe extern "C" fn compute_hash( - input: *const c_char, - algorithm: *const c_char, - encoding: *const c_char, -) -> *mut c_char { - crate::error::clear_error(); - - // Validate null pointers - if input.is_null() { - crate::error::set_error("Input pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if algorithm.is_null() { - crate::error::set_error("Algorithm pointer is null".to_string()); - return std::ptr::null_mut(); - } - - if encoding.is_null() { - crate::error::set_error("Encoding pointer is null".to_string()); - return std::ptr::null_mut(); - } - - // Convert C strings to Rust strings - let input_str = match unsafe { CStr::from_ptr(input).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in input string".to_string()); - return std::ptr::null_mut(); - } - }; - - let algorithm_str = match unsafe { CStr::from_ptr(algorithm).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in algorithm string".to_string()); - return std::ptr::null_mut(); - } - }; - - let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { - Ok(s) => s, - Err(_) => { - crate::error::set_error("Invalid UTF-8 in encoding string".to_string()); - return std::ptr::null_mut(); - } - }; - - // Convert string to bytes based on encoding - let bytes = match crate::base64::convert_string_to_bytes(input_str, encoding_str) { - Ok(b) => b, - Err(e) => { - crate::error::set_error(e); - return std::ptr::null_mut(); - } - }; - - // Compute hash based on algorithm (uppercase hex for .NET compatibility) - let hash_hex = match algorithm_str.to_uppercase().as_str() { - "MD5" => { - let mut hasher = Md5::new(); - hasher.update(&bytes); - format!("{:X}", hasher.finalize()) - } - "SHA1" => { - let mut hasher = Sha1::new(); - hasher.update(&bytes); - format!("{:X}", hasher.finalize()) - } - "SHA256" => { - let mut hasher = Sha256::new(); - hasher.update(&bytes); - format!("{:X}", hasher.finalize()) - } - "SHA384" => { - let mut hasher = Sha384::new(); - hasher.update(&bytes); - format!("{:X}", hasher.finalize()) - } - "SHA512" => { - let mut hasher = Sha512::new(); - hasher.update(&bytes); - format!("{:X}", hasher.finalize()) - } - _ => { - crate::error::set_error(format!( - "Unsupported algorithm: {}. Supported: MD5, SHA1, SHA256, SHA384, SHA512", - algorithm_str - )); - return std::ptr::null_mut(); - } - }; - - // Convert to C string - match CString::new(hash_hex) { - Ok(c_str) => { - crate::error::clear_error(); - c_str.into_raw() - } - Err(_) => { - crate::error::set_error("Failed to create C string from hash result".to_string()); - std::ptr::null_mut() - } - } -} - -/// Compute HMAC using the specified algorithm -/// -/// Helper function that encapsulates the algorithm-specific HMAC computation. -/// Returns uppercase hexadecimal string for .NET compatibility. -/// -/// # Arguments -/// * `algorithm` - Algorithm name (case-insensitive) -/// * `key` - Secret key bytes -/// * `input` - Input data bytes -/// -/// # Returns -/// Uppercase hex-encoded HMAC string, or error message -fn compute_hmac_with_algorithm( - algorithm: &str, - key: &[u8], - input: &[u8], -) -> Result { - match algorithm.to_uppercase().as_str() { - "MD5" => compute_hmac_md5(key, input), - "SHA1" => compute_hmac_sha1(key, input), - "SHA256" => compute_hmac_sha256(key, input), - "SHA384" => compute_hmac_sha384(key, input), - "SHA512" => compute_hmac_sha512(key, input), - _ => Err(format!( - "Unsupported algorithm: {}. Supported: MD5, SHA1, SHA256, SHA384, SHA512", - algorithm - )), - } -} +use super::algorithms::compute_hmac_internal; /// Compute an HMAC from a string with specified encoding /// /// This function accepts a string input and encoding parameter, handling the /// string-to-bytes conversion internally using the same encoding logic as -/// other functions in this library. This ensures consistent encoding behavior. +/// other functions in this library. /// /// # Safety /// This function is unsafe because it dereferences raw pointers. @@ -184,9 +26,6 @@ fn compute_hmac_with_algorithm( /// - SHA256 (recommended) /// - SHA384 /// - SHA512 -/// -/// # Returns -/// Pointer to null-terminated hex-encoded HMAC string, or null on error. #[unsafe(no_mangle)] pub unsafe extern "C" fn compute_hmac_with_encoding( input: *const c_char, @@ -197,7 +36,6 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( ) -> *mut c_char { crate::error::clear_error(); - // Validate null pointers if input.is_null() { crate::error::set_error("Input pointer is null".to_string()); return std::ptr::null_mut(); @@ -218,7 +56,6 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( return std::ptr::null_mut(); } - // SAFETY: input is guaranteed non-null by check above let input_str = match unsafe { CStr::from_ptr(input).to_str() } { Ok(s) => s, Err(_) => { @@ -227,7 +64,6 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( } }; - // SAFETY: algorithm is guaranteed non-null by check above let algorithm_str = match unsafe { CStr::from_ptr(algorithm).to_str() } { Ok(s) => s, Err(_) => { @@ -236,7 +72,6 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( } }; - // SAFETY: encoding is guaranteed non-null by check above let encoding_str = match unsafe { CStr::from_ptr(encoding).to_str() } { Ok(s) => s, Err(_) => { @@ -245,7 +80,6 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( } }; - // Convert string to bytes using the specified encoding let input_bytes = match crate::base64::convert_string_to_bytes(input_str, encoding_str) { Ok(b) => b, Err(e) => { @@ -254,11 +88,9 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( } }; - // SAFETY: key is guaranteed non-null and key_length is provided by caller let key_slice = unsafe { std::slice::from_raw_parts(key, key_length) }; - // Compute HMAC using the specified algorithm - let hmac_hex = match compute_hmac_with_algorithm(algorithm_str, key_slice, &input_bytes) { + let hmac_hex = match compute_hmac_internal(algorithm_str, key_slice, &input_bytes) { Ok(hex) => hex, Err(e) => { crate::error::set_error(e); @@ -266,7 +98,6 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( } }; - // Convert to C string match CString::new(hmac_hex) { Ok(c_str) => { crate::error::clear_error(); @@ -282,9 +113,7 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( /// Compute an HMAC from raw bytes /// /// This function accepts raw byte input directly, avoiding the need for -/// encoding conversions when working with binary data. This is the preferred -/// method when the input is already in byte form (e.g., from a MemoryStream -/// or byte array). +/// encoding conversions when working with binary data. /// /// # Safety /// This function is unsafe because it dereferences raw pointers. @@ -300,9 +129,6 @@ pub unsafe extern "C" fn compute_hmac_with_encoding( /// - SHA256 (recommended) /// - SHA384 /// - SHA512 -/// -/// # Returns -/// Pointer to null-terminated hex-encoded HMAC string, or null on error. #[unsafe(no_mangle)] pub unsafe extern "C" fn compute_hmac_bytes( input_bytes: *const u8, @@ -313,19 +139,16 @@ pub unsafe extern "C" fn compute_hmac_bytes( ) -> *mut c_char { crate::error::clear_error(); - // Validate key pointer if key.is_null() { crate::error::set_error("Key pointer is null".to_string()); return std::ptr::null_mut(); } - // Validate algorithm pointer if algorithm.is_null() { crate::error::set_error("Algorithm pointer is null".to_string()); return std::ptr::null_mut(); } - // SAFETY: algorithm is guaranteed non-null by check above let algorithm_str = match unsafe { CStr::from_ptr(algorithm).to_str() } { Ok(s) => s, Err(_) => { @@ -334,24 +157,19 @@ pub unsafe extern "C" fn compute_hmac_bytes( } }; - // Handle empty input case let input_slice = if input_length == 0 { &[] } else { - // Validate input pointer only when length > 0 if input_bytes.is_null() { crate::error::set_error("Input bytes pointer is null".to_string()); return std::ptr::null_mut(); } - // SAFETY: input_bytes is non-null and input_length is provided by caller unsafe { std::slice::from_raw_parts(input_bytes, input_length) } }; - // SAFETY: key is guaranteed non-null and key_length is provided by caller let key_slice = unsafe { std::slice::from_raw_parts(key, key_length) }; - // Compute HMAC using the specified algorithm - let hmac_hex = match compute_hmac_with_algorithm(algorithm_str, key_slice, input_slice) { + let hmac_hex = match compute_hmac_internal(algorithm_str, key_slice, input_slice) { Ok(hex) => hex, Err(e) => { crate::error::set_error(e); @@ -359,7 +177,6 @@ pub unsafe extern "C" fn compute_hmac_bytes( } }; - // Convert to C string match CString::new(hmac_hex) { Ok(c_str) => { crate::error::clear_error(); @@ -372,181 +189,13 @@ pub unsafe extern "C" fn compute_hmac_bytes( } } -/// Compute HMAC-MD5 -#[inline] -fn compute_hmac_md5(key: &[u8], input: &[u8]) -> Result { - type HmacMd5 = Hmac; - let mut mac = HmacMd5::new_from_slice(key) - .map_err(|_| "Failed to create HMAC-MD5 instance".to_string())?; - mac.update(input); - Ok(format!("{:X}", mac.finalize().into_bytes())) -} - -/// Compute HMAC-SHA1 -#[inline] -fn compute_hmac_sha1(key: &[u8], input: &[u8]) -> Result { - type HmacSha1 = Hmac; - let mut mac = HmacSha1::new_from_slice(key) - .map_err(|_| "Failed to create HMAC-SHA1 instance".to_string())?; - mac.update(input); - Ok(format!("{:X}", mac.finalize().into_bytes())) -} - -/// Compute HMAC-SHA256 -#[inline] -fn compute_hmac_sha256(key: &[u8], input: &[u8]) -> Result { - type HmacSha256 = Hmac; - let mut mac = HmacSha256::new_from_slice(key) - .map_err(|_| "Failed to create HMAC-SHA256 instance".to_string())?; - mac.update(input); - Ok(format!("{:X}", mac.finalize().into_bytes())) -} - -/// Compute HMAC-SHA384 -#[inline] -fn compute_hmac_sha384(key: &[u8], input: &[u8]) -> Result { - type HmacSha384 = Hmac; - let mut mac = HmacSha384::new_from_slice(key) - .map_err(|_| "Failed to create HMAC-SHA384 instance".to_string())?; - mac.update(input); - Ok(format!("{:X}", mac.finalize().into_bytes())) -} - -/// Compute HMAC-SHA512 -#[inline] -fn compute_hmac_sha512(key: &[u8], input: &[u8]) -> Result { - type HmacSha512 = Hmac; - let mut mac = HmacSha512::new_from_slice(key) - .map_err(|_| "Failed to create HMAC-SHA512 instance".to_string())?; - mac.update(input); - Ok(format!("{:X}", mac.finalize().into_bytes())) -} - #[cfg(test)] mod tests { use super::*; use std::ffi::CString; - #[test] - fn test_compute_hash_known_vectors() { - let test_cases = vec![ - ("MD5", "098F6BCD4621D373CADE4E832627B4F6"), - ("SHA1", "A94A8FE5CCB19BA61C4C0873D391E987982FBBD3"), - ( - "SHA256", - "9F86D081884C7D659A2FEAA0C55AD015A3BF4F1B2B0B822CD15D6C15B0F00A08", - ), - ( - "SHA384", - "768412320F7B0AA5812FCE428DC4706B3CAE50E02A64CAA16A782249BFE8EFC4B7EF1CCB126255D196047DFEDF17A0A9", - ), - ( - "SHA512", - "EE26B0DD4AF7E749AA1A8EE3C10AE9923F618980772E473F8819A5D4940E0DB27AC185F8A0E1D5F84F88BC887FD67B143732C304CC5FA9AD8E6F57F50028A8FF", - ), - ]; - - for (algorithm, expected_hash) in test_cases { - let input = CString::new("test").unwrap(); - let algo = CString::new(algorithm).unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { compute_hash(input.as_ptr(), algo.as_ptr(), encoding.as_ptr()) }; - - assert!(!result.is_null(), "{} result should not be null", algorithm); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, expected_hash, - "{} hash of 'test' should match known vector", - algorithm - ); - unsafe { crate::memory::free_string(result) }; - } - } - - #[test] - fn test_compute_hash_null_input_returns_null() { - let algorithm = CString::new("SHA256").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = - unsafe { compute_hash(std::ptr::null(), algorithm.as_ptr(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Null input should return null"); - } - - #[test] - fn test_compute_hash_null_algorithm_returns_null() { - let input = CString::new("test").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { compute_hash(input.as_ptr(), std::ptr::null(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Null algorithm should return null"); - } - - #[test] - fn test_compute_hash_null_encoding_returns_null() { - let input = CString::new("test").unwrap(); - let algorithm = CString::new("SHA256").unwrap(); - - let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), std::ptr::null()) }; - - assert!(result.is_null(), "Null encoding should return null"); - } - - #[test] - fn test_compute_hash_unsupported_algorithm_returns_null() { - let input = CString::new("test").unwrap(); - let algorithm = CString::new("UNSUPPORTED").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), encoding.as_ptr()) }; - - assert!(result.is_null(), "Unsupported algorithm should return null"); - } - - #[test] - fn test_compute_hash_empty_string() { - let input = CString::new("").unwrap(); - let algorithm = CString::new("SHA256").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), encoding.as_ptr()) }; - - assert!(!result.is_null(), "Empty string should produce a hash"); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str, "E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855", - "SHA256 hash of empty string should match known vector" - ); - unsafe { crate::memory::free_string(result) }; - } - - #[test] - fn test_compute_hash_large_string() { - let large_input = "A".repeat(1_000_000); - let input = CString::new(large_input).unwrap(); - let algorithm = CString::new("SHA256").unwrap(); - let encoding = CString::new("UTF8").unwrap(); - - let result = unsafe { compute_hash(input.as_ptr(), algorithm.as_ptr(), encoding.as_ptr()) }; - - assert!(!result.is_null(), "Large string should produce a hash"); - let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - assert_eq!( - result_str.len(), - 64, - "SHA256 hash should be 64 hex characters" - ); - unsafe { crate::memory::free_string(result) }; - } - - // ========== Tests for compute_hmac_bytes ========== - #[test] fn test_compute_hmac_bytes_known_vectors() { - // Same test vectors as compute_hmac - "test" as bytes with key "secret" let test_cases = vec![ ("MD5", "63D6BAF65DF6BDEE8F32B332E0930669"), ("SHA1", "1AA349585ED7ECBD3B9C486A30067E395CA4B356"), @@ -597,15 +246,12 @@ mod tests { #[test] fn test_compute_hmac_bytes_matches_encoding_version() { - // Verify that compute_hmac_bytes produces the same result as compute_hmac_with_encoding - // for the same input data (UTF-8 encoding) let input_str = CString::new("Hello, World!").unwrap(); let input_bytes = b"Hello, World!"; let key = b"my_secret_key"; let algorithm = CString::new("SHA256").unwrap(); let encoding = CString::new("UTF8").unwrap(); - // Get result from string version with encoding let string_result = unsafe { compute_hmac_with_encoding( input_str.as_ptr(), @@ -618,7 +264,6 @@ mod tests { assert!(!string_result.is_null()); let string_hmac = unsafe { CStr::from_ptr(string_result).to_str().unwrap().to_string() }; - // Get result from bytes version let bytes_result = unsafe { compute_hmac_bytes( input_bytes.as_ptr(), @@ -683,7 +328,6 @@ mod tests { let key = b"secret"; let algorithm = CString::new("SHA256").unwrap(); - // Empty input with null pointer and zero length let result = unsafe { compute_hmac_bytes( std::ptr::null(), @@ -705,7 +349,6 @@ mod tests { #[test] fn test_compute_hmac_bytes_binary_data() { - // Test with binary data that isn't valid UTF-8 let binary_input: &[u8] = &[0x00, 0x01, 0xFF, 0xFE, 0x80, 0x81]; let key = b"secret"; let algorithm = CString::new("SHA256").unwrap(); @@ -732,7 +375,7 @@ mod tests { #[test] fn test_compute_hmac_bytes_large_input() { - let large_input: Vec = vec![0x41; 1_000_000]; // 1MB of 'A' bytes + let large_input: Vec = vec![0x41; 1_000_000]; let key = b"secret"; let algorithm = CString::new("SHA256").unwrap(); @@ -780,7 +423,6 @@ mod tests { let key = b"secret"; let algorithm = CString::new("SHA256").unwrap(); - // Null input pointer with non-zero length should fail let result = unsafe { compute_hmac_bytes( std::ptr::null(), @@ -797,11 +439,8 @@ mod tests { ); } - // ========== Tests for compute_hmac_with_encoding ========== - #[test] fn test_compute_hmac_with_encoding_utf8() { - // Test with UTF8 encoding - should match compute_hmac result let input = CString::new("test").unwrap(); let key = b"secret"; let algorithm = CString::new("SHA256").unwrap(); @@ -828,14 +467,12 @@ mod tests { #[test] fn test_compute_hmac_with_encoding_matches_bytes_version() { - // Verify compute_hmac_with_encoding produces same result as compute_hmac_bytes for UTF8 let input = CString::new("Hello, World!").unwrap(); let input_bytes = b"Hello, World!"; let key = b"my_secret_key"; let algorithm = CString::new("SHA256").unwrap(); let encoding = CString::new("UTF8").unwrap(); - // Get result from compute_hmac_bytes let bytes_result = unsafe { compute_hmac_bytes( input_bytes.as_ptr(), @@ -848,7 +485,6 @@ mod tests { assert!(!bytes_result.is_null()); let bytes_hmac = unsafe { CStr::from_ptr(bytes_result).to_str().unwrap().to_string() }; - // Get result from compute_hmac_with_encoding with UTF8 let encoding_result = unsafe { compute_hmac_with_encoding( input.as_ptr(), @@ -893,7 +529,6 @@ mod tests { !result.is_null(), "ASCII encoding should work for ASCII input" ); - // ASCII and UTF8 produce same bytes for ASCII characters let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; assert_eq!( result_str, "0329A06B62CD16B33EB6792BE8C60B158D89A2EE3A876FCE9A881EBB488C0914", @@ -904,7 +539,6 @@ mod tests { #[test] fn test_compute_hmac_with_encoding_unicode() { - // Unicode (UTF-16LE) produces different bytes than UTF8 let input = CString::new("test").unwrap(); let key = b"secret"; let algorithm = CString::new("SHA256").unwrap(); @@ -922,8 +556,6 @@ mod tests { assert!(!result.is_null(), "Unicode encoding should work"); let result_str = unsafe { CStr::from_ptr(result).to_str().unwrap() }; - // UTF-16LE "test" = [0x74, 0x00, 0x65, 0x00, 0x73, 0x00, 0x74, 0x00] - // This should produce a different HMAC than UTF8 assert_ne!( result_str, "0329A06B62CD16B33EB6792BE8C60B158D89A2EE3A876FCE9A881EBB488C0914", "Unicode encoding should produce different HMAC than UTF8" diff --git a/lib/src/hash/mod.rs b/lib/src/hash/mod.rs new file mode 100644 index 0000000..6f59aa1 --- /dev/null +++ b/lib/src/hash/mod.rs @@ -0,0 +1,9 @@ +//! Cryptographic hash functions (MD5, SHA1, SHA256, SHA384, SHA512, HMAC) + +mod algorithms; +mod hash_ops; +mod hmac_ops; + +// Re-export public FFI functions +pub use hash_ops::compute_hash; +pub use hmac_ops::{compute_hmac_bytes, compute_hmac_with_encoding}; diff --git a/src/Convert/Convert.psd1 b/src/Convert/Convert.psd1 index ca215d6..7fe2bac 100644 --- a/src/Convert/Convert.psd1 +++ b/src/Convert/Convert.psd1 @@ -12,7 +12,7 @@ RootModule = 'Convert.psm1' # Version number of this module. - ModuleVersion = '2.0.5' + ModuleVersion = '2.0.6' # Supported PSEditions CompatiblePSEditions = @( diff --git a/src/Convert/Public/ConvertFrom-ByteArrayToString.ps1 b/src/Convert/Public/ConvertFrom-ByteArrayToString.ps1 index 4634de5..afa0ee2 100644 --- a/src/Convert/Public/ConvertFrom-ByteArrayToString.ps1 +++ b/src/Convert/Public/ConvertFrom-ByteArrayToString.ps1 @@ -57,6 +57,7 @@ function ConvertFrom-ByteArrayToString { ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)] [ValidateNotNullOrEmpty()] + [Alias('Bytes')] [Byte[]] $ByteArray, diff --git a/src/Convert/Public/ConvertFrom-CompressedByteArrayToString.ps1 b/src/Convert/Public/ConvertFrom-CompressedByteArrayToString.ps1 index 1d0fcdf..760996c 100644 --- a/src/Convert/Public/ConvertFrom-CompressedByteArrayToString.ps1 +++ b/src/Convert/Public/ConvertFrom-CompressedByteArrayToString.ps1 @@ -47,6 +47,7 @@ function ConvertFrom-CompressedByteArrayToString { ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)] [ValidateNotNullOrEmpty()] + [Alias('Bytes')] [Byte[]] $ByteArray,