From 44b1f2eefae48b099df051d289d4c84cc06e7c8a Mon Sep 17 00:00:00 2001 From: Joseph Perez Date: Tue, 16 Jul 2024 15:24:51 +0200 Subject: [PATCH] fix: fix keyexpr canonization unsafe behavior (#1191) * fix: fix keyexpr canonization unsafe behavior * fix: fix keyexpr canonization * Retrigger CI * refactor: use safe version of unimportant unsafe code --- commons/zenoh-keyexpr/src/key_expr/canon.rs | 172 ++++++++++---------- commons/zenoh-keyexpr/src/key_expr/utils.rs | 19 --- 2 files changed, 84 insertions(+), 107 deletions(-) diff --git a/commons/zenoh-keyexpr/src/key_expr/canon.rs b/commons/zenoh-keyexpr/src/key_expr/canon.rs index 7080dbde1a..8187467004 100644 --- a/commons/zenoh-keyexpr/src/key_expr/canon.rs +++ b/commons/zenoh-keyexpr/src/key_expr/canon.rs @@ -12,114 +12,102 @@ // ZettaScale Zenoh Team, // use alloc::string::String; -use core::{slice, str}; - -use crate::key_expr::{ - utils::{Split, Writer}, - DELIMITER, DOUBLE_WILD, SINGLE_WILD, -}; pub trait Canonize { fn canonize(&mut self); } -const DOLLAR_STAR: &[u8; 2] = b"$*"; - -impl Canonize for &mut str { - fn canonize(&mut self) { - let mut writer = Writer { - ptr: self.as_mut_ptr(), - len: 0, - }; - if let Some(position) = self.find("$*$*") { - writer.len = position; - let mut need_final_write = true; - for between_dollarstar in self.as_bytes()[(position + 4)..].splitter(DOLLAR_STAR) { - need_final_write = between_dollarstar.is_empty(); - if !need_final_write { - writer.write(DOLLAR_STAR.as_ref()); - writer.write(between_dollarstar); - } - } - if need_final_write { - writer.write(DOLLAR_STAR.as_ref()) +// Return the length of the canonized string +fn canonize(bytes: &mut [u8]) -> usize { + let mut index = 0; + let mut written = 0; + let mut double_wild = false; + loop { + match &bytes[index..] { + [b'*', b'*'] => { + bytes[written..written + 2].copy_from_slice(b"**"); + written += 2; + return written; } - *self = unsafe { - str::from_utf8_unchecked_mut(slice::from_raw_parts_mut(writer.ptr, writer.len)) + [b'*', b'*', b'/', ..] => { + double_wild = true; + index += 3; } - } - writer.len = 0; - let mut ke = self.as_bytes().splitter(&b'/'); - let mut in_big_wild = false; - - for chunk in ke.by_ref() { - if chunk.is_empty() { - break; - } - if in_big_wild { - match chunk { - [SINGLE_WILD] | b"$*" => { - writer.write_byte(b'*'); - break; - } - DOUBLE_WILD => continue, - _ => { - writer.write(b"**/"); - writer.write(chunk); - in_big_wild = false; - break; + [b'*', r @ ..] | [b'$', b'*', r @ ..] if r.is_empty() || r.starts_with(b"/") => { + let (end, len) = (!r.starts_with(b"/"), r.len()); + bytes[written] = b'*'; + written += 1; + if end { + if double_wild { + bytes[written..written + 3].copy_from_slice(b"/**"); + written += 3; } + return written; } - } else if chunk == DOUBLE_WILD { - in_big_wild = true; - continue; - } else { - writer.write(if chunk == b"$*" { b"*" } else { chunk }); - break; + bytes[written] = b'/'; + written += 1; + index = bytes.len() - len + 1; } - } - for chunk in ke { - if chunk.is_empty() { - writer.write_byte(b'/'); - continue; + // Handle chunks with only repeated "$*" + [b'$', b'*', b'$', b'*', ..] => { + index += 2; } - if in_big_wild { - match chunk { - [SINGLE_WILD] | b"$*" => { - writer.write(b"/*"); - } - DOUBLE_WILD => {} - _ => { - writer.write(b"/**/"); - writer.write(chunk); - in_big_wild = false; + _ => { + if double_wild && &bytes[index..] != b"**" { + bytes[written..written + 3].copy_from_slice(b"**/"); + written += 3; + double_wild = false; + } + let mut write_start = index; + loop { + match bytes.get(index) { + Some(b'/') => { + index += 1; + bytes.copy_within(write_start..index, written); + written += index - write_start; + break; + } + Some(b'$') if matches!(bytes.get(index + 1..index + 4), Some(b"*$*")) => { + index += 2; + bytes.copy_within(write_start..index, written); + written += index - write_start; + let skip = bytes[index + 4..] + .windows(2) + .take_while(|s| s == b"$*") + .count(); + index += (1 + skip) * 2; + write_start = index; + } + Some(_) => index += 1, + None => { + bytes.copy_within(write_start..index, written); + written += index - write_start; + return written; + } } } - } else if chunk == DOUBLE_WILD { - in_big_wild = true; - } else { - writer.write_byte(DELIMITER); - writer.write(if chunk == b"$*" { b"*" } else { chunk }); - } - } - if in_big_wild { - if writer.len != 0 { - writer.write_byte(DELIMITER); } - writer.write(DOUBLE_WILD) - } - *self = unsafe { - str::from_utf8_unchecked_mut(slice::from_raw_parts_mut(writer.ptr, writer.len)) } } } +impl Canonize for &mut str { + fn canonize(&mut self) { + // SAFETY: canonize leave an UTF8 string within the returned length, + // and remaining garbage bytes are zeroed + let bytes = unsafe { self.as_bytes_mut() }; + let length = canonize(bytes); + bytes[length..].fill(b'\0'); + } +} + impl Canonize for String { fn canonize(&mut self) { - let mut s = self.as_mut(); - s.canonize(); - let len = s.len(); - self.truncate(len); + // SAFETY: canonize leave an UTF8 string within the returned length, + // and remaining garbage bytes are truncated + let bytes = unsafe { self.as_mut_vec() }; + let length = canonize(bytes); + bytes.truncate(length); } } @@ -150,6 +138,9 @@ fn canonizer() { let mut s = String::from("hello/**/**/bye"); s.canonize(); assert_eq!(s, "hello/**/bye"); + let mut s = String::from("hello/**/**"); + s.canonize(); + assert_eq!(s, "hello/**"); // Any $* chunk is replaced by a * chunk let mut s = String::from("hello/$*/bye"); @@ -172,4 +163,9 @@ fn canonizer() { let mut s = String::from("hello/**/*"); s.canonize(); assert_eq!(s, "hello/*/**"); + + // &mut str remaining part is zeroed + let mut s = String::from("$*$*$*/hello/$*$*/bye/$*$*"); + s.as_mut_str().canonize(); + assert_eq!(s, "*/hello/*/bye/*\0\0\0\0\0\0\0\0\0\0\0"); } diff --git a/commons/zenoh-keyexpr/src/key_expr/utils.rs b/commons/zenoh-keyexpr/src/key_expr/utils.rs index 628477174a..63f4b4c088 100644 --- a/commons/zenoh-keyexpr/src/key_expr/utils.rs +++ b/commons/zenoh-keyexpr/src/key_expr/utils.rs @@ -11,25 +11,6 @@ // Contributors: // ZettaScale Zenoh Team, // -use core::ptr; - -pub(crate) struct Writer { - pub ptr: *mut u8, - pub len: usize, -} - -impl Writer { - pub(crate) fn write(&mut self, slice: &[u8]) { - let len = slice.len(); - unsafe { ptr::copy(slice.as_ptr(), self.ptr.add(self.len), len) }; - self.len += len - } - pub(crate) fn write_byte(&mut self, byte: u8) { - unsafe { *self.ptr.add(self.len) = byte }; - self.len += 1 - } -} - #[derive(Debug)] pub struct Splitter<'a, S: ?Sized, D: ?Sized> { s: Option<&'a S>,