Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable maximum crate size #153

Merged
merged 2 commits into from
May 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions alexandrie.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[general]
bind_address = "127.0.0.1:3000"
max_crate_size = "50 MB"

[frontend]
enabled = true
Expand Down
29 changes: 28 additions & 1 deletion crates/alexandrie/src/api/crates/publish.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::io::Read;
use std::path::PathBuf;
use std::pin::pin;

use async_std::io::prelude::*;

Expand Down Expand Up @@ -171,6 +172,21 @@ fn link_badges(
Ok(())
}

/// Checks whether the passed-in reader has ended (meaning it has reached EOF).
///
/// This function tests for this by attempting to read one more byte from the passed-in reader.
/// Therefore, the reader should not be used after having called this function, because that one byte
/// will be missing from the output.
async fn has_reader_ended<R>(reader: R) -> std::io::Result<bool>
where
R: async_std::io::Read,
{
pin!(reader)
.read(&mut [0])
.await
.map(|bytes_read| bytes_read == 0)
}

/// Route to publish a new crate (used by `cargo publish`).
pub(crate) async fn put(mut req: Request<State>) -> tide::Result {
let state = req.state().clone();
Expand All @@ -186,7 +202,18 @@ pub(crate) async fn put(mut req: Request<State>) -> tide::Result {
.ok_or(AlexError::InvalidToken)?;

let mut bytes = Vec::new();
(&mut req).take(10_000_000).read_to_end(&mut bytes).await?;
if let Some(max_crate_size) = req.state().general.max_crate_size {
(&mut req)
.take(max_crate_size)
.read_to_end(&mut bytes)
.await?;

if !has_reader_ended(&mut req).await? {
return Err(Error::from(AlexError::CrateTooLarge { max_crate_size }).into());
}
} else {
(&mut req).read_to_end(&mut bytes).await?;
}
let mut cursor = std::io::Cursor::new(bytes);

let metadata_size = cursor.read_u32::<LittleEndian>()?;
Expand Down
23 changes: 23 additions & 0 deletions crates/alexandrie/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ pub mod database;
#[cfg(feature = "frontend")]
pub mod frontend;

/// Serde (de)serialization helper functions.
pub mod serde_utils;

use alexandrie_index::config::IndexConfig;
use alexandrie_index::Index;
use alexandrie_rendering::config::{SyntectConfig, SyntectState};
Expand All @@ -24,6 +27,9 @@ use self::database::DatabaseConfig;
pub struct GeneralConfig {
/// The address to bind the server on.
pub bind_address: String,
/// The maximum allowed crate size.
#[serde(deserialize_with = "serde_utils::deserialize_file_size_opt")]
max_crate_size: Option<u64>,
}

/// The application configuration struct.
Expand All @@ -44,8 +50,16 @@ pub struct Config {
pub frontend: FrontendConfig,
}

/// the general configuration state, created from [GeneralConfig].
pub struct GeneralState {
/// The maximum crate size allowed for publication.
pub max_crate_size: Option<u64>,
}

/// The application state, created from [Config].
pub struct State {
/// General configuration state.
pub general: GeneralState,
/// The current crate indexer used.
pub index: Index,
/// The current crate storage strategy used.
Expand All @@ -59,9 +73,18 @@ pub struct State {
pub frontend: FrontendState,
}

impl From<GeneralConfig> for GeneralState {
fn from(config: GeneralConfig) -> Self {
Self {
max_crate_size: config.max_crate_size,
}
}
}

impl From<Config> for State {
fn from(config: Config) -> State {
State {
general: config.general.into(),
index: config.index.into(),
storage: config.storage.into(),
db: Database::new(&config.database),
Expand Down
250 changes: 250 additions & 0 deletions crates/alexandrie/src/config/serde_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
use std::fmt;

use serde::de::{self, Deserializer, Visitor};

/// Deserializes either a number or a string representing a human-readable file size into a `u64`.
///
/// The string format supported is roughly (expressed as a regular expression):
/// `^\s*(?P<number>\d+)\s*(?P<unit>B|kB|MB|GB|TB|kiB|MiB|GiB|TiB)\s*$`
pub fn deserialize_file_size<'de, D>(deserializer: D) -> Result<u64, D::Error>
where
D: Deserializer<'de>,
{
struct FileSizeVisitor;

impl<'de> Visitor<'de> for FileSizeVisitor {
type Value = u64;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a positive integer number (as bytes), or a string containing a positive integer number followed by a unit")
}

fn visit_u8<E>(self, value: u8) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_u64(u64::from(value))
}

fn visit_u16<E>(self, value: u16) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_u64(u64::from(value))
}

fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_u64(u64::from(value))
}

fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(value)
}

fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_str(value.as_str())
}

fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
parse_file_size(self, value)
}

fn visit_i8<E>(self, value: i8) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_i64(i64::from(value))
}

fn visit_i16<E>(self, value: i16) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_i64(i64::from(value))
}

fn visit_i32<E>(self, value: i32) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_i64(i64::from(value))
}

fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
where
E: de::Error,
{
u64::try_from(value).map_err(|_| {
de::Error::invalid_value(de::Unexpected::Signed(i64::from(value)), &self)
})
}
}

deserializer.deserialize_any(FileSizeVisitor)
}

/// Same as `deserialize_file_size`, but parses into an `Option` instead, allowing the field to be missing.
pub fn deserialize_file_size_opt<'de, D>(deserializer: D) -> Result<Option<u64>, D::Error>
where
D: Deserializer<'de>,
{
struct FileSizeOptVisitor;

impl<'de> Visitor<'de> for FileSizeOptVisitor {
type Value = Option<u64>;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a positive integer number (as bytes), or a string containing a positive integer number followed by a unit")
}

fn visit_u8<E>(self, value: u8) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_u64(u64::from(value))
}

fn visit_u16<E>(self, value: u16) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_u64(u64::from(value))
}

fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_u64(u64::from(value))
}

fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Some(value))
}

fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_str(value.as_str())
}

fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
parse_file_size(self, value).map(Some)
}

fn visit_i8<E>(self, value: i8) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_i64(i64::from(value))
}

fn visit_i16<E>(self, value: i16) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_i64(i64::from(value))
}

fn visit_i32<E>(self, value: i32) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_i64(i64::from(value))
}

fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
where
E: de::Error,
{
u64::try_from(value).map(Some).map_err(|_| {
de::Error::invalid_value(de::Unexpected::Signed(i64::from(value)), &self)
})
}

fn visit_none<E>(self) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(None)
}

fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(self)
}
}

deserializer.deserialize_any(FileSizeOptVisitor)
}

fn parse_file_size<'de, V, E>(visitor: V, value: &str) -> Result<u64, E>
where
V: Visitor<'de>,
E: de::Error,
{
let value = value.trim();
let position = value.chars().take_while(|it| it.is_ascii_digit()).count();
if position == 0 {
return Err(de::Error::invalid_value(
de::Unexpected::Str(value),
&visitor,
));
};

let (number, unit) = value.split_at(position);
let Ok(number) = number.trim().parse::<u64>() else {
return Err(de::Error::invalid_value(
de::Unexpected::Str(number.trim()),
&"a positive integer number parsable into a `u64`",
));
};

let factor = match unit.trim() {
"B" => 1,
"kB" => 1_000,
"MB" => 1_000_000,
"GB" => 1_000_000_000,
"TB" => 1_000_000_000_000,
"kiB" => 1_024,
"MiB" => 1_048_576, // 1_024 * 1_024
"GiB" => 1_073_741_824, // 1_024 * 1_024 * 1_024
"TiB" => 1_099_511_627_776, // 1_024 * 1_024 * 1_024 * 1_024
unit => {
return Err(de::Error::invalid_value(
de::Unexpected::Str(unit),
&"a valid file size unit (`B`, `kB`, `MB`, `GB`, `TB`, `kiB`, `MiB`, `GiB` or `TiB`)",
));
}
};

let Some(file_size) = number.checked_mul(factor) else {
return Err(de::Error::invalid_value(
de::Unexpected::Str(value),
&"the computed file size is bigger than `u64::MAX`",
));
};

Ok(file_size)
}
8 changes: 8 additions & 0 deletions crates/alexandrie/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ pub enum AlexError {
/// The list of missing query parameters.
missing_params: &'static [&'static str],
},
/// The uploaded crate is larger than the maximum allowed crate size.
#[error(
"uploaded crate is larger than the maximum allowed crate size of {max_crate_size} bytes"
)]
CrateTooLarge {
/// The maximum allowed crate size (in bytes).
max_crate_size: u64,
},
}

// impl IntoResponse for Error {
Expand Down
1 change: 1 addition & 0 deletions docker/mysql/alexandrie.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

[general]
bind_address = "0.0.0.0:3000"
max_crate_size = "50 MB"

[frontend]
enabled = true
Expand Down
1 change: 1 addition & 0 deletions docker/postgres/alexandrie.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

[general]
bind_address = "0.0.0.0:3000"
max_crate_size = "50 MB"

[frontend]
enabled = true
Expand Down
Loading