From b94822be069c24c6f43e32766ae12feb8babc625 Mon Sep 17 00:00:00 2001 From: Nicolas Polomack Date: Sat, 20 May 2023 19:55:25 +0200 Subject: [PATCH 1/2] feat: added `max_crate_size` configuration option --- alexandrie.toml | 1 + crates/alexandrie/src/api/crates/publish.rs | 9 +- crates/alexandrie/src/config/mod.rs | 23 ++ crates/alexandrie/src/config/serde_utils.rs | 250 ++++++++++++++++++++ docker/mysql/alexandrie.toml | 1 + docker/postgres/alexandrie.toml | 1 + docker/sqlite/alexandrie.toml | 1 + 7 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 crates/alexandrie/src/config/serde_utils.rs diff --git a/alexandrie.toml b/alexandrie.toml index 29e01374..8477dabd 100644 --- a/alexandrie.toml +++ b/alexandrie.toml @@ -1,5 +1,6 @@ [general] bind_address = "127.0.0.1:3000" +max_crate_size = "50 MB" [frontend] enabled = true diff --git a/crates/alexandrie/src/api/crates/publish.rs b/crates/alexandrie/src/api/crates/publish.rs index 7a5fb4bb..d5e4df05 100644 --- a/crates/alexandrie/src/api/crates/publish.rs +++ b/crates/alexandrie/src/api/crates/publish.rs @@ -186,7 +186,14 @@ pub(crate) async fn put(mut req: Request) -> tide::Result { .ok_or(AlexError::InvalidToken)?; let mut bytes = Vec::new(); - (&mut req).take(10_000_000).read_to_end(&mut bytes).await?; + if let Some(max_crate_size) = req.state().general.max_crate_size { + (&mut req) + .take(max_crate_size) + .read_to_end(&mut bytes) + .await?; + } else { + (&mut req).read_to_end(&mut bytes).await?; + } let mut cursor = std::io::Cursor::new(bytes); let metadata_size = cursor.read_u32::()?; diff --git a/crates/alexandrie/src/config/mod.rs b/crates/alexandrie/src/config/mod.rs index b29ac8bb..ef1b58ec 100644 --- a/crates/alexandrie/src/config/mod.rs +++ b/crates/alexandrie/src/config/mod.rs @@ -6,6 +6,9 @@ pub mod database; #[cfg(feature = "frontend")] pub mod frontend; +/// Serde (de)serialization helper functions. +pub mod serde_utils; + use alexandrie_index::config::IndexConfig; use alexandrie_index::Index; use alexandrie_rendering::config::{SyntectConfig, SyntectState}; @@ -24,6 +27,9 @@ use self::database::DatabaseConfig; pub struct GeneralConfig { /// The address to bind the server on. pub bind_address: String, + /// The maximum allowed crate size. + #[serde(deserialize_with = "serde_utils::deserialize_file_size_opt")] + max_crate_size: Option, } /// The application configuration struct. @@ -44,8 +50,16 @@ pub struct Config { pub frontend: FrontendConfig, } +/// the general configuration state, created from [GeneralConfig]. +pub struct GeneralState { + /// The maximum crate size allowed for publication. + pub max_crate_size: Option, +} + /// The application state, created from [Config]. pub struct State { + /// General configuration state. + pub general: GeneralState, /// The current crate indexer used. pub index: Index, /// The current crate storage strategy used. @@ -59,9 +73,18 @@ pub struct State { pub frontend: FrontendState, } +impl From for GeneralState { + fn from(config: GeneralConfig) -> Self { + Self { + max_crate_size: config.max_crate_size, + } + } +} + impl From for State { fn from(config: Config) -> State { State { + general: config.general.into(), index: config.index.into(), storage: config.storage.into(), db: Database::new(&config.database), diff --git a/crates/alexandrie/src/config/serde_utils.rs b/crates/alexandrie/src/config/serde_utils.rs new file mode 100644 index 00000000..5c09b83b --- /dev/null +++ b/crates/alexandrie/src/config/serde_utils.rs @@ -0,0 +1,250 @@ +use std::fmt; + +use serde::de::{self, Deserializer, Visitor}; + +/// Deserializes either a number or a string representing a human-readable file size into a `u64`. +/// +/// The string format supported is roughly (expressed as a regular expression): +/// `^\s*(?P\d+)\s*(?PB|kB|MB|GB|TB|kiB|MiB|GiB|TiB)\s*$` +pub fn deserialize_file_size<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + struct FileSizeVisitor; + + impl<'de> Visitor<'de> for FileSizeVisitor { + type Value = u64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a positive integer number (as bytes), or a string containing a positive integer number followed by a unit") + } + + fn visit_u8(self, value: u8) -> Result + where + E: de::Error, + { + self.visit_u64(u64::from(value)) + } + + fn visit_u16(self, value: u16) -> Result + where + E: de::Error, + { + self.visit_u64(u64::from(value)) + } + + fn visit_u32(self, value: u32) -> Result + where + E: de::Error, + { + self.visit_u64(u64::from(value)) + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + Ok(value) + } + + fn visit_string(self, value: String) -> Result + where + E: de::Error, + { + self.visit_str(value.as_str()) + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + parse_file_size(self, value) + } + + fn visit_i8(self, value: i8) -> Result + where + E: de::Error, + { + self.visit_i64(i64::from(value)) + } + + fn visit_i16(self, value: i16) -> Result + where + E: de::Error, + { + self.visit_i64(i64::from(value)) + } + + fn visit_i32(self, value: i32) -> Result + where + E: de::Error, + { + self.visit_i64(i64::from(value)) + } + + fn visit_i64(self, value: i64) -> Result + where + E: de::Error, + { + u64::try_from(value).map_err(|_| { + de::Error::invalid_value(de::Unexpected::Signed(i64::from(value)), &self) + }) + } + } + + deserializer.deserialize_any(FileSizeVisitor) +} + +/// Same as `deserialize_file_size`, but parses into an `Option` instead, allowing the field to be missing. +pub fn deserialize_file_size_opt<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + struct FileSizeOptVisitor; + + impl<'de> Visitor<'de> for FileSizeOptVisitor { + type Value = Option; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a positive integer number (as bytes), or a string containing a positive integer number followed by a unit") + } + + fn visit_u8(self, value: u8) -> Result + where + E: de::Error, + { + self.visit_u64(u64::from(value)) + } + + fn visit_u16(self, value: u16) -> Result + where + E: de::Error, + { + self.visit_u64(u64::from(value)) + } + + fn visit_u32(self, value: u32) -> Result + where + E: de::Error, + { + self.visit_u64(u64::from(value)) + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + Ok(Some(value)) + } + + fn visit_string(self, value: String) -> Result + where + E: de::Error, + { + self.visit_str(value.as_str()) + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + parse_file_size(self, value).map(Some) + } + + fn visit_i8(self, value: i8) -> Result + where + E: de::Error, + { + self.visit_i64(i64::from(value)) + } + + fn visit_i16(self, value: i16) -> Result + where + E: de::Error, + { + self.visit_i64(i64::from(value)) + } + + fn visit_i32(self, value: i32) -> Result + where + E: de::Error, + { + self.visit_i64(i64::from(value)) + } + + fn visit_i64(self, value: i64) -> Result + where + E: de::Error, + { + u64::try_from(value).map(Some).map_err(|_| { + de::Error::invalid_value(de::Unexpected::Signed(i64::from(value)), &self) + }) + } + + fn visit_none(self) -> Result + where + E: de::Error, + { + Ok(None) + } + + fn visit_some(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(self) + } + } + + deserializer.deserialize_any(FileSizeOptVisitor) +} + +fn parse_file_size<'de, V, E>(visitor: V, value: &str) -> Result +where + V: Visitor<'de>, + E: de::Error, +{ + let value = value.trim(); + let position = value.chars().take_while(|it| it.is_ascii_digit()).count(); + if position == 0 { + return Err(de::Error::invalid_value( + de::Unexpected::Str(value), + &visitor, + )); + }; + + let (number, unit) = value.split_at(position); + let Ok(number) = number.trim().parse::() else { + return Err(de::Error::invalid_value( + de::Unexpected::Str(number.trim()), + &"a positive integer number parsable into a `u64`", + )); + }; + + let factor = match unit.trim() { + "B" => 1, + "kB" => 1_000, + "MB" => 1_000_000, + "GB" => 1_000_000_000, + "TB" => 1_000_000_000_000, + "kiB" => 1_024, + "MiB" => 1_048_576, // 1_024 * 1_024 + "GiB" => 1_073_741_824, // 1_024 * 1_024 * 1_024 + "TiB" => 1_099_511_627_776, // 1_024 * 1_024 * 1_024 * 1_024 + unit => { + return Err(de::Error::invalid_value( + de::Unexpected::Str(unit), + &"a valid file size unit (`B`, `kB`, `MB`, `GB`, `TB`, `kiB`, `MiB`, `GiB` or `TiB`)", + )); + } + }; + + let Some(file_size) = number.checked_mul(factor) else { + return Err(de::Error::invalid_value( + de::Unexpected::Str(value), + &"the computed file size is bigger than `u64::MAX`", + )); + }; + + Ok(file_size) +} diff --git a/docker/mysql/alexandrie.toml b/docker/mysql/alexandrie.toml index 442248f7..595408ad 100644 --- a/docker/mysql/alexandrie.toml +++ b/docker/mysql/alexandrie.toml @@ -7,6 +7,7 @@ [general] bind_address = "0.0.0.0:3000" +max_crate_size = "50 MB" [frontend] enabled = true diff --git a/docker/postgres/alexandrie.toml b/docker/postgres/alexandrie.toml index 7cb979bc..ae992796 100644 --- a/docker/postgres/alexandrie.toml +++ b/docker/postgres/alexandrie.toml @@ -7,6 +7,7 @@ [general] bind_address = "0.0.0.0:3000" +max_crate_size = "50 MB" [frontend] enabled = true diff --git a/docker/sqlite/alexandrie.toml b/docker/sqlite/alexandrie.toml index 364d358d..5d880a39 100644 --- a/docker/sqlite/alexandrie.toml +++ b/docker/sqlite/alexandrie.toml @@ -7,6 +7,7 @@ [general] bind_address = "0.0.0.0:3000" +max_crate_size = "50 MB" [frontend] enabled = true From f29620563cfd74323514dd949fb35bbf80c42117 Mon Sep 17 00:00:00 2001 From: Nicolas Polomack Date: Sat, 20 May 2023 19:56:23 +0200 Subject: [PATCH 2/2] feat: added crate truncation error check --- crates/alexandrie/src/api/crates/publish.rs | 20 ++++++++++++++++++++ crates/alexandrie/src/error.rs | 8 ++++++++ 2 files changed, 28 insertions(+) diff --git a/crates/alexandrie/src/api/crates/publish.rs b/crates/alexandrie/src/api/crates/publish.rs index d5e4df05..c4607fc1 100644 --- a/crates/alexandrie/src/api/crates/publish.rs +++ b/crates/alexandrie/src/api/crates/publish.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::io::Read; use std::path::PathBuf; +use std::pin::pin; use async_std::io::prelude::*; @@ -171,6 +172,21 @@ fn link_badges( Ok(()) } +/// Checks whether the passed-in reader has ended (meaning it has reached EOF). +/// +/// This function tests for this by attempting to read one more byte from the passed-in reader. +/// Therefore, the reader should not be used after having called this function, because that one byte +/// will be missing from the output. +async fn has_reader_ended(reader: R) -> std::io::Result +where + R: async_std::io::Read, +{ + pin!(reader) + .read(&mut [0]) + .await + .map(|bytes_read| bytes_read == 0) +} + /// Route to publish a new crate (used by `cargo publish`). pub(crate) async fn put(mut req: Request) -> tide::Result { let state = req.state().clone(); @@ -191,6 +207,10 @@ pub(crate) async fn put(mut req: Request) -> tide::Result { .take(max_crate_size) .read_to_end(&mut bytes) .await?; + + if !has_reader_ended(&mut req).await? { + return Err(Error::from(AlexError::CrateTooLarge { max_crate_size }).into()); + } } else { (&mut req).read_to_end(&mut bytes).await?; } diff --git a/crates/alexandrie/src/error.rs b/crates/alexandrie/src/error.rs index 02fc30ec..f4aa52f4 100644 --- a/crates/alexandrie/src/error.rs +++ b/crates/alexandrie/src/error.rs @@ -87,6 +87,14 @@ pub enum AlexError { /// The list of missing query parameters. missing_params: &'static [&'static str], }, + /// The uploaded crate is larger than the maximum allowed crate size. + #[error( + "uploaded crate is larger than the maximum allowed crate size of {max_crate_size} bytes" + )] + CrateTooLarge { + /// The maximum allowed crate size (in bytes). + max_crate_size: u64, + }, } // impl IntoResponse for Error {