From 8982dd52b57c17962dae28ea78700e1f285ee868 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Wed, 11 Dec 2024 16:02:50 +0100 Subject: [PATCH 01/23] File deduplication --- Cargo.lock | 9 +++--- Cargo.toml | 1 + src/blob.rs | 79 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/message.rs | 20 +++++++++++++ 4 files changed, 102 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf03494d29..8addf75467 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -571,9 +571,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" dependencies = [ "arrayref", "arrayvec", @@ -984,9 +984,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "convert_case" @@ -1316,6 +1316,7 @@ dependencies = [ "async-smtp", "async_zip", "base64 0.22.1", + "blake3", "brotli", "bytes", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 72b3f23e7b..469d5d4ac8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,6 +110,7 @@ toml = "0.8" url = "2" uuid = { version = "1", features = ["serde", "v4"] } webpki-roots = "0.26.7" +blake3 = "1.5.5" [dev-dependencies] anyhow = { workspace = true, features = ["backtrace"] } # Enable `backtrace` feature in tests. diff --git a/src/blob.rs b/src/blob.rs index 78d53641da..7b227261c5 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -8,7 +8,7 @@ use std::iter::FusedIterator; use std::mem; use std::path::{Path, PathBuf}; -use anyhow::{format_err, Context as _, Result}; +use anyhow::{bail, format_err, Context as _, Result}; use base64::Engine as _; use futures::StreamExt; use image::codecs::jpeg::JpegEncoder; @@ -16,7 +16,7 @@ use image::ImageReader; use image::{DynamicImage, GenericImage, GenericImageView, ImageFormat, Pixel, Rgba}; use num_traits::FromPrimitive; use tokio::io::AsyncWriteExt; -use tokio::{fs, io}; +use tokio::{fs, io, task}; use tokio_stream::wrappers::ReadDirStream; use crate::config::Config; @@ -139,6 +139,47 @@ impl<'a> BlobObject<'a> { Ok(blob) } + /// TODO document + /// TODO what about race conditions when the same file is created multiple times concurrently + pub async fn create_and_deduplicate( + context: &'a Context, + src: &Path, + ) -> Result> { + // `update_reader()` uses std::fs, so we need to call task::block_in_place(). + // Tokio io also just calls spawn_blocking() internally (e.g. https://docs.rs/tokio/1.42.0/src/tokio/fs/file.rs.html#606 and https://docs.rs/tokio/latest/src/tokio/fs/mod.rs.html#310) + // so we are doing essentially the same here. + + task::block_in_place(|| { + let blobdir = context.get_blobdir(); + if !(src.starts_with(blobdir) || src.starts_with("$BLOBDIR/")) { + bail!("The file needs to be in the blob dir already and will be renamed. To attach a different file, copy it to the blobdir first."); + } + + let mut src_file = std::fs::File::open(src) + .with_context(|| format!("failed to open file {}", src.display()))?; + let mut hasher = blake3::Hasher::new(); + hasher.update_reader(&mut src_file)?; + let hash = hasher.finalize().to_hex(); + let hash = hash.as_str(); + let new_path = blobdir.join(hash); + + // This will also replace an already-existing file: + if let Err(_) = std::fs::rename(src, &new_path) { + // Try a second time in case there was some temporary error. + // There is no need to try and create the blobdir since create_and_deduplicate() + // only works for files that already are in the blobdir, anyway. + std::fs::rename(src, &new_path)?; + }; + + let blob = BlobObject { + blobdir: blobdir, + name: format!("$BLOBDIR/{hash}"), + }; + context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); + Ok(blob) + }) + } + /// Creates a blob from a file, possibly copying it to the blobdir. /// /// If the source file is not a path to into the blob directory @@ -1444,7 +1485,7 @@ mod tests { .await .context("failed to write file")?; let mut msg = Message::new(Viewtype::Image); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file(file.to_str().unwrap(), None); // TODO make sure that test also passes with set_file_and_deduplicate let chat = alice.create_chat(&bob).await; let sent = alice.send_msg(chat.id, &mut msg).await; let bob_msg = bob.recv_msg(&sent).await; @@ -1480,4 +1521,36 @@ mod tests { assert_eq!(msg.get_viewtype(), Viewtype::Sticker); Ok(()) } + + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn test_deduplication() -> Result<()> { + let t = TestContext::new().await; + + let path = t.get_blobdir().join("anyfile.dat"); + fs::write(&path, b"bla").await?; + let blob = BlobObject::create_and_deduplicate(&t, &path).await?; + assert_eq!( + blob.name, + "$BLOBDIR/ce940175885d7b78f7b7e9f1396611ff3e6828ebba2ca0d8b6e0f860ef2baf66" + ); + assert_eq!(path.exists(), false); + + fs::write(&path, b"bla").await?; + let blob2 = BlobObject::create_and_deduplicate(&t, &path).await?; + assert_eq!(blob2.name, blob.name); + + let path_outside_blobdir = t.dir.path().join("anyfile.dat"); + fs::write(&path_outside_blobdir, b"bla").await?; + let blob_res = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir).await; + assert!( + blob_res.is_err(), + "Files outside the blobdir should not be allowed in create_and_deduplicate()" + ); + + fs::write(&path, b"blabla").await?; + let blob3 = BlobObject::create_and_deduplicate(&t, &path).await?; + assert_ne!(blob3.name, blob.name); + + Ok(()) + } } diff --git a/src/message.rs b/src/message.rs index 07e597955b..51eccc4592 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1082,6 +1082,26 @@ impl Message { self.param.set_optional(Param::MimeType, filemime); } + /// Sets the file associated with a message. + /// The actual current name of the file is ignored, instead `name` is used. + /// The file (may be moved immediately by core) (and must not be modified again after this method was called) + /// + /// TODO document, also in deltachat.h + pub async fn set_file_and_deduplicate( + &mut self, + context: &Context, + file: &Path, + filemime: Option<&str>, + name: &str, + ) -> Result<()> { + let blob = BlobObject::create_and_deduplicate(context, file).await?; + self.param.set(Param::Filename, name); + self.param.set(Param::File, blob.as_name()); + self.param.set_optional(Param::MimeType, filemime); + + Ok(()) + } + /// Creates a new blob and sets it as a file associated with a message. pub async fn set_file_from_bytes( &mut self, From 34f4a4584b2f036d0b91148c6c97e5e9276c4f61 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Thu, 12 Dec 2024 22:56:30 +0100 Subject: [PATCH 02/23] --wip-- [skip ci] --- src/blob.rs | 25 +++++++++++++++++++++---- src/message.rs | 2 +- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 7b227261c5..c46c997ba0 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -811,7 +811,12 @@ mod tests { fn check_image_size(path: impl AsRef, width: u32, height: u32) -> image::DynamicImage { tokio::task::block_in_place(move || { - let img = image::open(path).expect("failed to open image"); + let img = ImageReader::open(path) + .expect("failed to open image") + .with_guessed_format() + .expect("failed to guess format") + .decode() + .expect("failed to decode image"); assert_eq!(img.width(), width, "invalid width"); assert_eq!(img.height(), height, "invalid height"); img @@ -1057,7 +1062,12 @@ mod tests { ) .unwrap(); tokio::task::block_in_place(move || { - let img = image::open(blob.to_abs_path()).unwrap(); + let img = ImageReader::open(blob.to_abs_path()) + .unwrap() + .with_guessed_format() + .unwrap() + .decode() + .unwrap(); assert!(img.width() == img_wh); assert!(img.height() == img_wh); assert_eq!(img.get_pixel(0, 0), Rgba(color)); @@ -1108,7 +1118,12 @@ mod tests { assert!(file_size(&avatar_blob).await <= 3000); assert!(file_size(&avatar_blob).await > 2000); tokio::task::block_in_place(move || { - let img = image::open(avatar_blob).unwrap(); + let img = ImageReader::open(avatar_blob) + .unwrap() + .with_guessed_format() + .unwrap() + .decode() + .unwrap(); assert!(img.width() > 130); assert_eq!(img.width(), img.height()); }); @@ -1414,6 +1429,7 @@ mod tests { .set_config(Config::MediaQuality, Some(media_quality_config)) .await?; let file = alice.get_blobdir().join("file").with_extension(extension); + let file_name = format!("file{extension}"); fs::write(&file, &bytes) .await @@ -1429,7 +1445,8 @@ mod tests { } let mut msg = Message::new(viewtype); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, &file_name, None) + .await?; let chat = alice.create_chat(&bob).await; if set_draft { chat.id.set_draft(&alice, Some(&mut msg)).await.unwrap(); diff --git a/src/message.rs b/src/message.rs index 51eccc4592..b84c200823 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1091,8 +1091,8 @@ impl Message { &mut self, context: &Context, file: &Path, - filemime: Option<&str>, name: &str, + filemime: Option<&str>, ) -> Result<()> { let blob = BlobObject::create_and_deduplicate(context, file).await?; self.param.set(Param::Filename, name); From 4cf43570ae5fa49521c3f43b70a2eaf0db6a09e0 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 13 Dec 2024 13:32:52 +0100 Subject: [PATCH 03/23] Fix the tests (some of the fixes may need a new test) --- src/blob.rs | 4 +++- src/chat.rs | 19 +++++++++---------- src/message.rs | 20 ++++++++++++++------ src/mimefactory.rs | 18 ++++++++++-------- src/mimeparser.rs | 10 ++++++---- 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index c46c997ba0..79adca176e 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -1429,7 +1429,7 @@ mod tests { .set_config(Config::MediaQuality, Some(media_quality_config)) .await?; let file = alice.get_blobdir().join("file").with_extension(extension); - let file_name = format!("file{extension}"); + let file_name = format!("file.{extension}"); fs::write(&file, &bytes) .await @@ -1463,6 +1463,8 @@ mod tests { alice_msg.save_file(&alice, &file_saved).await?; check_image_size(file_saved, compressed_width, compressed_height); + println!("{}", sent.payload()); + let bob_msg = bob.recv_msg(&sent).await; assert_eq!(bob_msg.get_viewtype(), Viewtype::Image); assert_eq!(bob_msg.get_width() as u32, compressed_width); diff --git a/src/chat.rs b/src/chat.rs index 99769fbcf0..40dab35261 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -896,13 +896,12 @@ impl ChatId { .context("no file stored in params")?; msg.param.set(Param::File, blob.as_name()); if msg.viewtype == Viewtype::File { - if let Some((better_type, _)) = - message::guess_msgtype_from_suffix(&blob.to_abs_path()) - // We do not do an automatic conversion to other viewtypes here so that - // users can send images as "files" to preserve the original quality - // (usually we compress images). The remaining conversions are done by - // `prepare_msg_blob()` later. - .filter(|&(vt, _)| vt == Viewtype::Webxdc || vt == Viewtype::Vcard) + if let Some((better_type, _)) = message::guess_msgtype_from_suffix(msg) + // We do not do an automatic conversion to other viewtypes here so that + // users can send images as "files" to preserve the original quality + // (usually we compress images). The remaining conversions are done by + // `prepare_msg_blob()` later. + .filter(|&(vt, _)| vt == Viewtype::Webxdc || vt == Viewtype::Vcard) { msg.viewtype = better_type; } @@ -2698,8 +2697,7 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> { // Typical conversions: // - from FILE to AUDIO/VIDEO/IMAGE // - from FILE/IMAGE to GIF */ - if let Some((better_type, _)) = message::guess_msgtype_from_suffix(&blob.to_abs_path()) - { + if let Some((better_type, _)) = message::guess_msgtype_from_suffix(msg) { if msg.viewtype == Viewtype::Sticker { if better_type != Viewtype::Image { // UIs don't want conversions of `Sticker` to anything other than `Image`. @@ -2737,6 +2735,7 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> { } } msg.param.set(Param::File, blob.as_name()); + // TODO not sure if we still need the next part if let (Some(filename), Some(blob_ext)) = (msg.param.get(Param::Filename), blob.suffix()) { let stem = match filename.rsplit_once('.') { Some((stem, _)) => stem, @@ -2747,7 +2746,7 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> { } if !msg.param.exists(Param::MimeType) { - if let Some((_, mime)) = message::guess_msgtype_from_suffix(&blob.to_abs_path()) { + if let Some((_, mime)) = message::guess_msgtype_from_suffix(msg) { msg.param.set(Param::MimeType, mime); } } diff --git a/src/message.rs b/src/message.rs index b84c200823..4822c92c56 100644 --- a/src/message.rs +++ b/src/message.rs @@ -620,8 +620,8 @@ impl Message { pub fn get_filemime(&self) -> Option { if let Some(m) = self.param.get(Param::MimeType) { return Some(m.to_string()); - } else if let Some(file) = self.param.get(Param::File) { - if let Some((_, mime)) = guess_msgtype_from_suffix(Path::new(file)) { + } else if self.param.exists(Param::File) { + if let Some((_, mime)) = guess_msgtype_from_suffix(self) { return Some(mime.to_string()); } // we have a file but no mimetype, let's use a generic one @@ -1103,6 +1103,7 @@ impl Message { } /// Creates a new blob and sets it as a file associated with a message. + /// TODO this could also deduplicate pub async fn set_file_from_bytes( &mut self, context: &Context, @@ -1453,7 +1454,14 @@ pub async fn get_msg_read_receipts( .await } -pub(crate) fn guess_msgtype_from_suffix(path: &Path) -> Option<(Viewtype, &str)> { +pub(crate) fn guess_msgtype_from_suffix(msg: &Message) -> Option<(Viewtype, &'static str)> { + msg.param + .get(Param::Filename) + .or_else(|| msg.param.get(Param::File)) + .and_then(|file| guess_msgtype_from_path_suffix(Path::new(file))) +} + +pub(crate) fn guess_msgtype_from_path_suffix(path: &Path) -> Option<(Viewtype, &'static str)> { let extension: &str = &path.extension()?.to_str()?.to_lowercase(); let info = match extension { // before using viewtype other than Viewtype::File, @@ -2198,15 +2206,15 @@ mod tests { #[test] fn test_guess_msgtype_from_suffix() { assert_eq!( - guess_msgtype_from_suffix(Path::new("foo/bar-sth.mp3")), + guess_msgtype_from_path_suffix(Path::new("foo/bar-sth.mp3")), Some((Viewtype::Audio, "audio/mpeg")) ); assert_eq!( - guess_msgtype_from_suffix(Path::new("foo/file.html")), + guess_msgtype_from_path_suffix(Path::new("foo/file.html")), Some((Viewtype::File, "text/html")) ); assert_eq!( - guess_msgtype_from_suffix(Path::new("foo/file.xdc")), + guess_msgtype_from_path_suffix(Path::new("foo/file.xdc")), Some((Viewtype::Webxdc, "application/webxdc+zip")) ); } diff --git a/src/mimefactory.rs b/src/mimefactory.rs index a1e2876f95..4b1fabe225 100644 --- a/src/mimefactory.rs +++ b/src/mimefactory.rs @@ -1,6 +1,7 @@ //! # MIME message production. use std::collections::HashSet; +use std::path::Path; use anyhow::{bail, Context as _, Result}; use base64::Engine as _; @@ -1511,13 +1512,18 @@ pub(crate) fn wrapped_base64_encode(buf: &[u8]) -> String { .join("\r\n") } -async fn build_body_file(context: &Context, msg: &Message) -> Result { +async fn build_body_file(context: &Context, msg: &Message) -> Result<(PartBuilder, String)> { + let file_name = msg.get_filename().context("msg has no file")?; + let suffix = Path::new(&file_name) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("dat"); + let blob = msg .param .get_blob(Param::File, context) .await? .context("msg has no file")?; - let suffix = blob.suffix().unwrap_or("dat"); // Get file name to use for sending. For privacy purposes, we do // not transfer the original filenames eg. for images; these names @@ -1557,18 +1563,14 @@ async fn build_body_file(context: &Context, msg: &Message) -> Result msg - .param - .get(Param::Filename) - .unwrap_or_else(|| blob.as_file_name()) - .to_string(), + _ => file_name, }; /* check mimetype */ let mimetype: mime::Mime = match msg.param.get(Param::MimeType) { Some(mtype) => mtype.parse()?, None => { - if let Some(res) = message::guess_msgtype_from_suffix(blob.as_rel_path()) { + if let Some(res) = message::guess_msgtype_from_suffix(msg) { res.1.parse()? } else { mime::APPLICATION_OCTET_STREAM diff --git a/src/mimeparser.rs b/src/mimeparser.rs index c027b57664..b7e871fffb 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -2041,10 +2041,12 @@ fn get_mime_type( } mime::APPLICATION => match mimetype.subtype() { mime::OCTET_STREAM => match filename { - Some(filename) => match message::guess_msgtype_from_suffix(Path::new(&filename)) { - Some((viewtype, _)) => viewtype, - None => Viewtype::File, - }, + Some(filename) => { + match message::guess_msgtype_from_path_suffix(Path::new(&filename)) { + Some((viewtype, _)) => viewtype, + None => Viewtype::File, + } + } None => Viewtype::File, }, _ => Viewtype::File, From 6a02bced3cc1d2a40888023b76adc9edfba73d44 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 13 Dec 2024 18:18:15 +0100 Subject: [PATCH 04/23] Fix some more tests, I'll need to remove some println statements --- src/blob.rs | 3 +-- src/debug_logging.rs | 8 +++++--- src/message.rs | 12 ++++++------ src/webxdc.rs | 6 +++--- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 79adca176e..dae2dd0aa8 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -145,10 +145,9 @@ impl<'a> BlobObject<'a> { context: &'a Context, src: &Path, ) -> Result> { - // `update_reader()` uses std::fs, so we need to call task::block_in_place(). + // `update_reader()` uses std::fs, so we need to use task::block_in_place(). // Tokio io also just calls spawn_blocking() internally (e.g. https://docs.rs/tokio/1.42.0/src/tokio/fs/file.rs.html#606 and https://docs.rs/tokio/latest/src/tokio/fs/mod.rs.html#310) // so we are doing essentially the same here. - task::block_in_place(|| { let blobdir = context.get_blobdir(); if !(src.starts_with(blobdir) || src.starts_with("$BLOBDIR/")) { diff --git a/src/debug_logging.rs b/src/debug_logging.rs index eae36fe3d0..4b0ce441f4 100644 --- a/src/debug_logging.rs +++ b/src/debug_logging.rs @@ -100,7 +100,9 @@ pub async fn maybe_set_logging_xdc( context, msg.get_viewtype(), chat_id, - msg.param.get_path(Param::File, context).unwrap_or_default(), + msg.param + .get_path(Param::Filename, context) + .unwrap_or_default(), msg.get_id(), ) .await?; @@ -113,11 +115,11 @@ pub async fn maybe_set_logging_xdc_inner( context: &Context, viewtype: Viewtype, chat_id: ChatId, - file: Option, + filename: Option, msg_id: MsgId, ) -> anyhow::Result<()> { if viewtype == Viewtype::Webxdc { - if let Some(file) = file { + if let Some(file) = filename { if let Some(file_name) = file.file_name().and_then(|name| name.to_str()) { if file_name.starts_with("debug_logging") && file_name.ends_with(".xdc") diff --git a/src/message.rs b/src/message.rs index 4822c92c56..9eb75f6c56 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1104,17 +1104,17 @@ impl Message { /// Creates a new blob and sets it as a file associated with a message. /// TODO this could also deduplicate - pub async fn set_file_from_bytes( + /// TODO this is set as pub(crate) now since it's not used in any API + pub(crate) async fn set_file_from_bytes( &mut self, context: &Context, - suggested_name: &str, + name: &str, data: &[u8], filemime: Option<&str>, ) -> Result<()> { - let blob = BlobObject::create(context, suggested_name, data).await?; - self.param.set(Param::Filename, suggested_name); - self.param.set(Param::File, blob.as_name()); - self.param.set_optional(Param::MimeType, filemime); + let blob = BlobObject::create(context, name, data).await?; + self.set_file_and_deduplicate(context, &blob.to_abs_path(), name, filemime) + .await?; Ok(()) } diff --git a/src/webxdc.rs b/src/webxdc.rs index 0829933008..49e75e9b72 100644 --- a/src/webxdc.rs +++ b/src/webxdc.rs @@ -238,7 +238,9 @@ const STATUS_UPDATE_SIZE_MAX: usize = 100 << 10; impl Context { /// check if a file is an acceptable webxdc for sending or receiving. pub(crate) async fn is_webxdc_file(&self, filename: &str, file: &[u8]) -> Result { + println!("is_webxdc_file"); if !filename.ends_with(WEBXDC_SUFFIX) { + println!("Ends not with suffix"); return Ok(false); } @@ -255,15 +257,13 @@ impl Context { return Ok(false); } + println!("Is webxdc"); Ok(true) } /// Ensure that a file is an acceptable webxdc for sending. pub(crate) async fn ensure_sendable_webxdc_file(&self, path: &Path) -> Result<()> { let filename = path.to_str().unwrap_or_default(); - if !filename.ends_with(WEBXDC_SUFFIX) { - bail!("{} is not a valid webxdc file", filename); - } let valid = match FsZipFileReader::new(path).await { Ok(archive) => { From 6b00993a5384cb99ec578da5df1930a5602a9e02 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Wed, 18 Dec 2024 15:52:06 +0100 Subject: [PATCH 05/23] Adapt more tests and fix most of them --- src/blob.rs | 105 +++++++++++++++++++++++++++++----------------------- src/chat.rs | 42 +++++++++++++++------ src/sql.rs | 10 +++-- 3 files changed, 95 insertions(+), 62 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index dae2dd0aa8..64d76c6fd0 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -154,10 +154,11 @@ impl<'a> BlobObject<'a> { bail!("The file needs to be in the blob dir already and will be renamed. To attach a different file, copy it to the blobdir first."); } + let mut hasher = blake3::Hasher::new(); let mut src_file = std::fs::File::open(src) .with_context(|| format!("failed to open file {}", src.display()))?; - let mut hasher = blake3::Hasher::new(); hasher.update_reader(&mut src_file)?; + drop(src_file); let hash = hasher.finalize().to_hex(); let hash = hash.as_str(); let new_path = blobdir.join(hash); @@ -396,8 +397,6 @@ impl<'a> BlobObject<'a> { } pub async fn recode_to_avatar_size(&mut self, context: &Context) -> Result<()> { - let blob_abs = self.to_abs_path(); - let img_wh = match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?) .unwrap_or_default() @@ -410,16 +409,15 @@ impl<'a> BlobObject<'a> { let strict_limits = true; // max_bytes is 20_000 bytes: Outlook servers don't allow headers larger than 32k. // 32 / 4 * 3 = 24k if you account for base64 encoding. To be safe, we reduced this to 20k. - if let Some(new_name) = self.recode_to_size( + self.recode_to_size( context, - blob_abs, + "".to_string(), // The name of an avatar doesn't matter maybe_sticker, img_wh, 20_000, strict_limits, - )? { - self.name = new_name; - } + )?; + Ok(()) } @@ -433,9 +431,9 @@ impl<'a> BlobObject<'a> { pub async fn recode_to_image_size( &mut self, context: &Context, + name: String, maybe_sticker: &mut bool, - ) -> Result<()> { - let blob_abs = self.to_abs_path(); + ) -> Result { let (img_wh, max_bytes) = match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?) .unwrap_or_default() @@ -447,35 +445,36 @@ impl<'a> BlobObject<'a> { MediaQuality::Worse => (constants::WORSE_IMAGE_SIZE, constants::WORSE_IMAGE_BYTES), }; let strict_limits = false; - if let Some(new_name) = self.recode_to_size( + let new_name = self.recode_to_size( context, - blob_abs, + name, maybe_sticker, img_wh, max_bytes, strict_limits, - )? { - self.name = new_name; - } - Ok(()) + )?; + + Ok(new_name) } /// If `!strict_limits`, then if `max_bytes` is exceeded, reduce the image to `img_wh` and just /// proceed with the result. + /// TODO documentation fn recode_to_size( &mut self, context: &Context, - mut blob_abs: PathBuf, + mut name: String, maybe_sticker: &mut bool, mut img_wh: u32, max_bytes: usize, strict_limits: bool, - ) -> Result> { + ) -> Result { // Add white background only to avatars to spare the CPU. let mut add_white_bg = img_wh <= constants::BALANCED_AVATAR_SIZE; let mut no_exif = false; let no_exif_ref = &mut no_exif; - let res = tokio::task::block_in_place(move || { + let original_name = name.clone(); + let res: Result = tokio::task::block_in_place(move || { let mut file = std::fs::File::open(self.to_abs_path())?; let (nr_bytes, exif) = image_metadata(&file)?; *no_exif_ref = exif.is_none(); @@ -489,7 +488,7 @@ impl<'a> BlobObject<'a> { file.rewind()?; ImageReader::with_format( std::io::BufReader::new(&file), - ImageFormat::from_path(&blob_abs)?, + ImageFormat::from_path(&self.to_abs_path())?, ) } }; @@ -497,7 +496,6 @@ impl<'a> BlobObject<'a> { let mut img = imgreader.decode().context("image decode failure")?; let orientation = exif.as_ref().map(|exif| exif_orientation(exif, context)); let mut encoded = Vec::new(); - let mut changed_name = None; if *maybe_sticker { let x_max = img.width().saturating_sub(1); @@ -509,7 +507,7 @@ impl<'a> BlobObject<'a> { || img.get_pixel(x_max, y_max).0[3] == 0); } if *maybe_sticker && exif.is_none() { - return Ok(None); + return Ok(name); } img = match orientation { @@ -606,10 +604,10 @@ impl<'a> BlobObject<'a> { if !matches!(fmt, ImageFormat::Jpeg) && matches!(ofmt, ImageOutputFormat::Jpeg { .. }) { - blob_abs = blob_abs.with_extension("jpg"); - let file_name = blob_abs.file_name().context("No image file name (???)")?; - let file_name = file_name.to_str().context("Filename is no UTF-8 (???)")?; - changed_name = Some(format!("$BLOBDIR/{file_name}")); + name = Path::new(&name) + .with_extension("jpg") + .to_string_lossy() + .into_owned(); } if encoded.is_empty() { @@ -619,11 +617,16 @@ impl<'a> BlobObject<'a> { encode_img(&img, ofmt, &mut encoded)?; } - std::fs::write(&blob_abs, &encoded) + let hash = blake3::hash(&encoded).to_hex(); + let hash = hash.as_str(); + let new_path = context.get_blobdir().join(hash); + + std::fs::write(&new_path, &encoded) .context("failed to write recoded blob to file")?; + self.name = format!("$BLOBDIR/{hash}"); } - Ok(changed_name) + Ok(name) }); match res { Ok(_) => res, @@ -633,7 +636,7 @@ impl<'a> BlobObject<'a> { context, "Cannot recode image, using original data: {err:#}.", ); - Ok(None) + Ok(original_name) } else { Err(err) } @@ -1053,7 +1056,7 @@ mod tests { let strict_limits = true; blob.recode_to_size( &t, - blob.to_abs_path(), + "avatar.png".to_string(), maybe_sticker, img_wh, 20_000, @@ -1076,19 +1079,26 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_selfavatar_outside_blobdir() { + async fn file_size(path_buf: &Path) -> u64 { + fs::metadata(path_buf).await.unwrap().len() + } + let t = TestContext::new().await; let avatar_src = t.dir.path().join("avatar.jpg"); let avatar_bytes = include_bytes!("../test-data/image/avatar1000x1000.jpg"); fs::write(&avatar_src, avatar_bytes).await.unwrap(); - let avatar_blob = t.get_blobdir().join("avatar.jpg"); - assert!(!avatar_blob.exists()); t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap())) .await .unwrap(); - assert!(avatar_blob.exists()); - assert!(fs::metadata(&avatar_blob).await.unwrap().len() < avatar_bytes.len() as u64); - let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap(); - assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string())); + let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); + let avatar_path = Path::new(&avatar_blob); + assert!( + avatar_blob + .ends_with("d98cd30ed8f2129bf3968420208849d4663bb4e9e124e208b45f43d68acda18e"), + "The avatar filename should be its hash, put instead it's {avatar_blob}" + ); + let scaled_avatar_size = file_size(&avatar_path).await; + assert!(scaled_avatar_size < avatar_bytes.len() as u64); check_image_size(avatar_src, 1000, 1000); check_image_size( @@ -1097,27 +1107,27 @@ mod tests { constants::BALANCED_AVATAR_SIZE, ); - async fn file_size(path_buf: &Path) -> u64 { - let file = File::open(path_buf).await.unwrap(); - file.metadata().await.unwrap().len() - } - - let mut blob = BlobObject::new_from_path(&t, &avatar_blob).await.unwrap(); + let mut blob = BlobObject::new_from_path(&t, avatar_path).await.unwrap(); let maybe_sticker = &mut false; let strict_limits = true; blob.recode_to_size( &t, - blob.to_abs_path(), + "avatar.jpg".to_string(), maybe_sticker, 1000, 3000, strict_limits, ) .unwrap(); - assert!(file_size(&avatar_blob).await <= 3000); - assert!(file_size(&avatar_blob).await > 2000); + let new_file_size = file_size(&blob.to_abs_path()).await; + assert!(new_file_size <= 3000); + assert!(new_file_size > 2000); + // The new file should be smaller: + assert!(new_file_size < scaled_avatar_size); + // And the original file should not be touched: + assert_eq!(file_size(&avatar_path).await, scaled_avatar_size); tokio::task::block_in_place(move || { - let img = ImageReader::open(avatar_blob) + let img = ImageReader::open(&blob.to_abs_path()) .unwrap() .with_guessed_format() .unwrap() @@ -1503,7 +1513,8 @@ mod tests { .await .context("failed to write file")?; let mut msg = Message::new(Viewtype::Image); - msg.set_file(file.to_str().unwrap(), None); // TODO make sure that test also passes with set_file_and_deduplicate + msg.set_file_and_deduplicate(&alice, &file, "file.gif", None) + .await?; let chat = alice.create_chat(&bob).await; let sent = alice.send_msg(chat.id, &mut msg).await; let bob_msg = bob.recv_msg(&sent).await; diff --git a/src/chat.rs b/src/chat.rs index 40dab35261..89390209ed 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -2727,8 +2727,14 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> { && (msg.viewtype == Viewtype::Image || maybe_sticker && !msg.param.exists(Param::ForceSticker)) { - blob.recode_to_image_size(context, &mut maybe_sticker) + let new_name = blob + .recode_to_image_size( + context, + msg.get_filename().unwrap_or_else(|| "file".to_string()), + &mut maybe_sticker, + ) .await?; + msg.param.set(Param::Filename, new_name); if !maybe_sticker { msg.viewtype = Viewtype::Image; @@ -6465,7 +6471,8 @@ mod tests { tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, filename, None) + .await?; let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; let mime = sent_msg.payload(); @@ -6514,7 +6521,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn test_sticker_jpeg_force() { + async fn test_sticker_jpeg_force() -> Result<()> { let alice = TestContext::new_alice().await; let bob = TestContext::new_bob().await; let alice_chat = alice.create_chat(&bob).await; @@ -6529,14 +6536,19 @@ mod tests { // Images without force_sticker should be turned into [Viewtype::Image] let mut msg = Message::new(Viewtype::Sticker); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + .await + .unwrap(); + let file = msg.get_file(&alice).unwrap(); let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; let msg = bob.recv_msg(&sent_msg).await; assert_eq!(msg.get_viewtype(), Viewtype::Image); // Images with `force_sticker = true` should keep [Viewtype::Sticker] let mut msg = Message::new(Viewtype::Sticker); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + .await + .unwrap(); msg.force_sticker(); let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; let msg = bob.recv_msg(&sent_msg).await; @@ -6545,7 +6557,9 @@ mod tests { // Images with `force_sticker = true` should keep [Viewtype::Sticker] // even on drafted messages let mut msg = Message::new(Viewtype::Sticker); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + .await + .unwrap(); msg.force_sticker(); alice_chat .id @@ -6556,6 +6570,8 @@ mod tests { let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; let msg = bob.recv_msg(&sent_msg).await; assert_eq!(msg.get_viewtype(), Viewtype::Sticker); + + Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -6584,7 +6600,8 @@ mod tests { let file = alice.get_blobdir().join(file_name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + .await?; // send sticker to bob let sent_msg = alice.send_msg(alice_chat.get_id(), &mut msg).await; @@ -7179,7 +7196,7 @@ mod tests { let file = t.get_blobdir().join(name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(msg_type); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(t, &file, name, None).await?; send_msg(t, chat_id, &mut msg).await } @@ -7330,11 +7347,11 @@ mod tests { Contact::create(&alice, "bob", "bob@example.net").await?, ) .await?; - let dir = tempfile::tempdir()?; - let file = dir.path().join("harmless_file.\u{202e}txt.exe"); + let file = alice.get_blobdir().join("harmless_file.\u{202e}txt.exe"); fs::write(&file, "aaa").await?; let mut msg = Message::new(Viewtype::File); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, "harmless_file.\u{202e}txt.exe", None) + .await?; let msg = bob.recv_msg(&alice.send_msg(chat_id, &mut msg).await).await; // the file bob receives should not contain BIDI-control characters @@ -7669,7 +7686,8 @@ mod tests { let file = alice.get_blobdir().join("screenshot.png"); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, "screenshot.png", None) + .await?; let alice_chat = alice.create_chat(&bob).await; let sent_msg = alice.send_msg(alice_chat.get_id(), &mut msg).await; diff --git a/src/sql.rs b/src/sql.rs index 62a0b44b93..3e359d4ce1 100644 --- a/src/sql.rs +++ b/src/sql.rs @@ -260,9 +260,13 @@ impl Sql { let mut blob = BlobObject::new_from_path(context, avatar.as_ref()).await?; match blob.recode_to_avatar_size(context).await { Ok(()) => { - context - .set_config_internal(Config::Selfavatar, Some(&avatar)) - .await? + if let Some(path) = blob.to_abs_path().to_str() { + context + .set_config_internal(Config::Selfavatar, Some(path)) + .await?; + } else { + warn!(context, "Setting selfavatar failed: invalid filename (???)"); + } } Err(e) => { warn!(context, "Migrations can't recode avatar, removing. {:#}", e); From 9f1c1860fffaf92c5d8c4d7d2f1921b6eb4ba79c Mon Sep 17 00:00:00 2001 From: Hocuri Date: Wed, 18 Dec 2024 15:54:22 +0100 Subject: [PATCH 06/23] test: Assume that the avatar name also changes --- src/blob.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 64d76c6fd0..43c954a4df 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -1152,10 +1152,8 @@ mod tests { .await .unwrap(); let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); - assert_eq!( - avatar_cfg, - avatar_src.with_extension("png").to_str().unwrap() - ); + assert!(avatar_cfg + .ends_with("9e7f409ac5c92b942cc4f31cee2770ab3f69cf621fb5eb4e430ff6d21b3f37fd")); check_image_size( avatar_cfg, From d60a30963f8d889d54c031a23cb81bbcda2e4f7b Mon Sep 17 00:00:00 2001 From: Hocuri Date: Wed, 18 Dec 2024 16:26:55 +0100 Subject: [PATCH 07/23] Adapt summary.rs tests --- src/summary.rs | 84 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/src/summary.rs b/src/summary.rs index 72bf3395d0..177c6a0fb9 100644 --- a/src/summary.rs +++ b/src/summary.rs @@ -286,6 +286,8 @@ impl Message { #[cfg(test)] mod tests { + use std::path::PathBuf; + use super::*; use crate::chat::ChatId; use crate::param::Param; @@ -305,56 +307,96 @@ mod tests { .unwrap(); let some_text = " bla \t\n\tbla\n\t".to_string(); + async fn write_file_to_blobdir(d: &TestContext) -> PathBuf { + let bytes = &[38, 209, 39, 29]; // Just some random bytes + let file = d.get_blobdir().join("random_filename_392438"); + tokio::fs::write(&file, bytes).await.unwrap(); + file + } + let msg = Message::new_text(some_text.to_string()); assert_summary_texts(&msg, ctx, "bla bla").await; // for simple text, the type is not added to the summary + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Image); - msg.set_file("foo.jpg", None); + msg.set_file_and_deduplicate(&d, &file, "foo.jpg", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“· Image").await; // file names are not added for images + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Image); msg.set_text(some_text.to_string()); - msg.set_file("foo.jpg", None); + msg.set_file_and_deduplicate(&d, &file, "foo.jpg", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“· bla bla").await; // type is visible by emoji if text is set + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Video); - msg.set_file("foo.mp4", None); + msg.set_file_and_deduplicate(&d, &file, "foo.mp4", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ„ Video").await; // file names are not added for videos + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Video); msg.set_text(some_text.to_string()); - msg.set_file("foo.mp4", None); + msg.set_file_and_deduplicate(&d, &file, "foo.mp4", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ„ bla bla").await; // type is visible by emoji if text is set + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Gif); - msg.set_file("foo.gif", None); + msg.set_file_and_deduplicate(&d, &file, "foo.gif", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "GIF").await; // file names are not added for GIFs + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Gif); msg.set_text(some_text.to_string()); - msg.set_file("foo.gif", None); + msg.set_file_and_deduplicate(&d, &file, "foo.gif", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "GIF \u{2013} bla bla").await; // file names are not added for GIFs + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file("foo.png", None); + msg.set_file_and_deduplicate(&d, &file, "foo.png", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "Sticker").await; // file names are not added for stickers + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Voice); - msg.set_file("foo.mp3", None); + msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ¤ Voice message").await; // file names are not added for voice messages + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Voice); msg.set_text(some_text.clone()); - msg.set_file("foo.mp3", None); + msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ¤ bla bla").await; + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Audio); - msg.set_file("foo.mp3", None); + msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽµ foo.mp3").await; // file name is added for audio + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Audio); msg.set_text(some_text.clone()); - msg.set_file("foo.mp3", None); + msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽµ foo.mp3 \u{2013} bla bla").await; // file name and text added for audio let mut msg = Message::new(Viewtype::File); @@ -369,18 +411,27 @@ mod tests { chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap(); assert_summary_texts(&msg, ctx, "nice app! \u{2013} bla bla").await; + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::File); - msg.set_file("foo.bar", None); + msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“Ž foo.bar").await; // file name is added for files + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::File); msg.set_text(some_text.clone()); - msg.set_file("foo.bar", None); + msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“Ž foo.bar \u{2013} bla bla").await; // file name is added for files + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::VideochatInvitation); msg.set_text(some_text.clone()); - msg.set_file("foo.bar", None); + msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + .await + .unwrap(); assert_summary_texts(&msg, ctx, "Video chat invitation").await; // text is not added for videochat invitations let mut msg = Message::new(Viewtype::Vcard); @@ -419,9 +470,12 @@ mod tests { assert_eq!(msg.get_summary_text(ctx).await, "Forwarded: bla bla"); // for simple text, the type is not added to the summary assert_eq!(msg.get_summary_text_without_prefix(ctx).await, "bla bla"); // skipping prefix used for reactions summaries + let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::File); msg.set_text(some_text.clone()); - msg.set_file("foo.bar", None); + msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + .await + .unwrap(); msg.param.set_int(Param::Forwarded, 1); assert_eq!( msg.get_summary_text(ctx).await, From dba030c87f20af83b96dc188ef4db6b7842f02de Mon Sep 17 00:00:00 2001 From: Hocuri Date: Wed, 18 Dec 2024 16:28:07 +0100 Subject: [PATCH 08/23] Adapt some more tests, they all pass --- src/download.rs | 4 +++- src/imex/transfer.rs | 11 +++++++++-- src/location.rs | 3 ++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/download.rs b/src/download.rs index 0dd6d81f16..fa7aa8ca70 100644 --- a/src/download.rs +++ b/src/download.rs @@ -440,7 +440,9 @@ mod tests { let file = alice.get_blobdir().join("minimal.xdc"); tokio::fs::write(&file, include_bytes!("../test-data/webxdc/minimal.xdc")).await?; let mut instance = Message::new(Viewtype::File); - instance.set_file(file.to_str().unwrap(), None); + instance + .set_file_and_deduplicate(&alice, &file, "minimal.xdc", None) + .await?; let _sent1 = alice.send_msg(chat_id, &mut instance).await; alice diff --git a/src/imex/transfer.rs b/src/imex/transfer.rs index 297bed6af3..793c1f89bc 100644 --- a/src/imex/transfer.rs +++ b/src/imex/transfer.rs @@ -394,7 +394,9 @@ mod tests { let file = ctx0.get_blobdir().join("hello.txt"); fs::write(&file, "i am attachment").await.unwrap(); let mut msg = Message::new(Viewtype::File); - msg.set_file(file.to_str().unwrap(), Some("text/plain")); + msg.set_file_and_deduplicate(&ctx0, &file, "hello.txt", Some("text/plain")) + .await + .unwrap(); send_msg(&ctx0, self_chat.id, &mut msg).await.unwrap(); // Prepare to transfer backup. @@ -428,7 +430,12 @@ mod tests { let msg = Message::load_from_db(&ctx1, *msgid).await.unwrap(); let path = msg.get_file(&ctx1).unwrap(); - assert_eq!(path.with_file_name("hello.txt"), path); + assert_eq!( + // That's the hash of the file: + path.with_file_name("ac1d2d284757656a8d41dc40aae4136ff2bbb95c582d8adb8fdde38d224ec3c1"), + path + ); + assert_eq!("hello.txt", msg.get_filename().unwrap()); let text = fs::read_to_string(&path).await.unwrap(); assert_eq!(text, "i am attachment"); diff --git a/src/location.rs b/src/location.rs index babfdc28ff..792ecb9916 100644 --- a/src/location.rs +++ b/src/location.rs @@ -1074,7 +1074,8 @@ Content-Disposition: attachment; filename="location.kml" let file = alice.get_blobdir().join(file_name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file(file.to_str().unwrap(), None); + msg.set_file_and_deduplicate(&alice, &file, "logo.png", None) + .await?; let sent = alice.send_msg(alice_chat.id, &mut msg).await; let alice_msg = Message::load_from_db(&alice, sent.sender_msg_id).await?; assert_eq!(alice_msg.has_location(), false); From 4c7b0c22f5b68ad64578921377903e54994ec1ae Mon Sep 17 00:00:00 2001 From: Hocuri Date: Wed, 18 Dec 2024 16:28:39 +0100 Subject: [PATCH 09/23] Adapt src/receive_imf/tests.rs, fails because there is no deduplication on reception --- src/receive_imf/tests.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index 398471719e..d11303e4ce 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -3247,7 +3247,9 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { tokio::fs::write(&attachment, content.as_bytes()).await?; let mut msg_alice = Message::new(Viewtype::File); - msg_alice.set_file(attachment.to_str().unwrap(), None); + msg_alice + .set_file_and_deduplicate(&alice, &attachment, filename_sent, None) + .await?; let alice_chat = alice.create_chat(&bob).await; let sent = alice.send_msg(alice_chat.id, &mut msg_alice).await; println!("{}", sent.payload()); @@ -3261,9 +3263,10 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { let path = msg.get_file(t).unwrap(); let path2 = path.with_file_name("saved.txt"); msg.save_file(t, &path2).await.unwrap(); - assert!( - path.to_str().unwrap().ends_with(".tar.gz"), - "path {path:?} doesn't end with .tar.gz" + assert_eq!( + path.file_name().unwrap().to_str().unwrap(), + "a9ee651cbe76ce2c5ff4bf50e41a66d431ab11f3f7d4727a3e765f6101482bdd", + "The hash of the content should always be the same" ); assert_eq!(fs::read_to_string(&path).await.unwrap(), content); assert_eq!(fs::read_to_string(&path2).await.unwrap(), content); From f83d5a20ccce1eda3bad4be05f228c17569caf04 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Thu, 19 Dec 2024 15:32:49 +0100 Subject: [PATCH 10/23] Deduplicate on message reception, fix all tests --- src/blob.rs | 22 ++++++++++++++++++++++ src/chat.rs | 6 +++++- src/mimeparser.rs | 6 +++--- src/receive_imf/tests.rs | 16 ++++++++++------ 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 43c954a4df..6f327e11d0 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -180,6 +180,28 @@ impl<'a> BlobObject<'a> { }) } + pub async fn create_and_deduplicate_blob( + context: &'a Context, + data: &[u8], + ) -> Result> { + task::block_in_place(|| { + let blobdir = context.get_blobdir(); + + let hash = blake3::hash(&data).to_hex(); + let hash = hash.as_str(); + let new_path = context.get_blobdir().join(hash); + + std::fs::write(&new_path, &data).context("failed to write blob to file")?; + + let blob = BlobObject { + blobdir: blobdir, + name: format!("$BLOBDIR/{hash}"), + }; + context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); + Ok(blob) + }) + } + /// Creates a blob from a file, possibly copying it to the blobdir. /// /// If the source file is not a path to into the blob directory diff --git a/src/chat.rs b/src/chat.rs index 89390209ed..c386ef30f5 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -7356,9 +7356,13 @@ mod tests { // the file bob receives should not contain BIDI-control characters assert_eq!( - Some("$BLOBDIR/harmless_file.txt.exe"), + Some("$BLOBDIR/30c0f9c6a167fc2a91285c85be7ea341569b3b39fcc5f77fd34534cade971d20"), msg.param.get(Param::File), ); + assert_eq!( + Some("harmless_file.txt.exe"), + msg.param.get(Param::Filename), + ); Ok(()) } diff --git a/src/mimeparser.rs b/src/mimeparser.rs index b7e871fffb..7bfbf4f491 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -1371,7 +1371,7 @@ impl MimeMessage { /* we have a regular file attachment, write decoded data to new blob object */ - let blob = match BlobObject::create(context, filename, decoded_data).await { + let blob = match BlobObject::create_and_deduplicate_blob(context, decoded_data).await { Ok(blob) => blob, Err(err) => { error!( @@ -3919,8 +3919,8 @@ Message. "this is a classic email ā€“ I attached the .EML file".to_string() ); assert_eq!( - mime_message.parts[0].param.get(Param::File), - Some("$BLOBDIR/.eml") + mime_message.parts[0].param.get(Param::Filename), + Some(".eml") ); assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string())); diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index d11303e4ce..f21470e340 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -1663,8 +1663,12 @@ async fn test_pdf_filename_simple() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/simple")); - assert!(file_path.ends_with(".pdf")); + assert_eq!( + file_path, + // That's the blake3 hash of the file content: + "$BLOBDIR/24a6af459cec5d733374aeaa19a6133bd4830864e407a7c0395678f97a7ec8a1" + ); + assert_eq!(msg.param.get(Param::Filename).unwrap(), "simple.pdf"); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -1679,8 +1683,8 @@ async fn test_pdf_filename_continuation() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/test pdf aĢˆoĢˆuĢˆĆŸ")); - assert!(file_path.ends_with(".pdf")); + assert!(file_path.starts_with("$BLOBDIR/")); + assert_eq!(msg.get_filename().unwrap(), "test pdf aĢˆoĢˆuĢˆĆŸ.pdf"); } /// HTML-images may come with many embedded images, eg. tiny icons, corners for formatting, @@ -3243,7 +3247,7 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { "a. tar.tar.gz", ] { let attachment = alice.blobdir.join(filename_sent); - let content = format!("File content of {filename_sent}"); + let content = format!("File content of tar.gz archive"); tokio::fs::write(&attachment, content.as_bytes()).await?; let mut msg_alice = Message::new(Viewtype::File); @@ -3265,7 +3269,7 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { msg.save_file(t, &path2).await.unwrap(); assert_eq!( path.file_name().unwrap().to_str().unwrap(), - "a9ee651cbe76ce2c5ff4bf50e41a66d431ab11f3f7d4727a3e765f6101482bdd", + "79402cb76f44c5761888f9036992a768fe8f6cdb67846c5adcf0140cbb478180", "The hash of the content should always be the same" ); assert_eq!(fs::read_to_string(&path).await.unwrap(), content); From 27af74bde5dab76af89193ab9147f12effeb782b Mon Sep 17 00:00:00 2001 From: Hocuri Date: Thu, 19 Dec 2024 16:14:54 +0100 Subject: [PATCH 11/23] Small tweaks, clippy --- src/blob.rs | 2 -- src/message.rs | 9 +++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 6f327e11d0..74cde0d33f 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -827,8 +827,6 @@ fn add_white_bg(img: &mut DynamicImage) { #[cfg(test)] mod tests { - use fs::File; - use super::*; use crate::message::{Message, Viewtype}; use crate::test_utils::{self, TestContext}; diff --git a/src/message.rs b/src/message.rs index 9eb75f6c56..2a7c72a27c 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1103,7 +1103,6 @@ impl Message { } /// Creates a new blob and sets it as a file associated with a message. - /// TODO this could also deduplicate /// TODO this is set as pub(crate) now since it's not used in any API pub(crate) async fn set_file_from_bytes( &mut self, @@ -1112,9 +1111,11 @@ impl Message { data: &[u8], filemime: Option<&str>, ) -> Result<()> { - let blob = BlobObject::create(context, name, data).await?; - self.set_file_and_deduplicate(context, &blob.to_abs_path(), name, filemime) - .await?; + let blob = BlobObject::create_and_deduplicate_blob(context, data).await?; + self.param.set(Param::Filename, name); + self.param.set(Param::File, blob.as_name()); + self.param.set_optional(Param::MimeType, filemime); + Ok(()) } From f16ec310f0fe31f17207ebcdfb108e3287e52156 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Mon, 23 Dec 2024 21:22:30 +0100 Subject: [PATCH 12/23] Set deduplicated files as read-only on the file system --- src/blob.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 74cde0d33f..e93765ca53 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -170,6 +170,7 @@ impl<'a> BlobObject<'a> { // only works for files that already are in the blobdir, anyway. std::fs::rename(src, &new_path)?; }; + set_readonly(&new_path).log_err(context).ok(); let blob = BlobObject { blobdir: blobdir, @@ -191,7 +192,16 @@ impl<'a> BlobObject<'a> { let hash = hash.as_str(); let new_path = context.get_blobdir().join(hash); - std::fs::write(&new_path, &data).context("failed to write blob to file")?; + if let Err(e) = std::fs::write(&new_path, &data) { + if new_path.exists() { + // Looks like the file is read-only and exists already + // TODO: Maybe we should check if the file contents are the same, + // or at least if the length is the same, and overwrite if not. + } else { + bail!("Failed to write file: {e:#}"); + } + } + set_readonly(&new_path).log_err(context).ok(); let blob = BlobObject { blobdir: blobdir, @@ -667,6 +677,13 @@ impl<'a> BlobObject<'a> { } } +fn set_readonly(new_path: &Path) -> Result<()> { + let mut perms = std::fs::metadata(&new_path)?.permissions(); + perms.set_readonly(true); + std::fs::set_permissions(&new_path, perms)?; + Ok(()) +} + /// Returns image file size and Exif. pub fn image_metadata(file: &std::fs::File) -> Result<(u64, Option)> { let len = file.metadata()?.len(); @@ -1570,7 +1587,7 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn test_deduplication() -> Result<()> { + async fn test_create_and_deduplicate() -> Result<()> { let t = TestContext::new().await; let path = t.get_blobdir().join("anyfile.dat"); @@ -1582,6 +1599,12 @@ mod tests { ); assert_eq!(path.exists(), false); + // The file should be read-only: + fs::write(&blob.to_abs_path(), b"bla blub") + .await + .unwrap_err(); + assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla"); + fs::write(&path, b"bla").await?; let blob2 = BlobObject::create_and_deduplicate(&t, &path).await?; assert_eq!(blob2.name, blob.name); @@ -1600,4 +1623,29 @@ mod tests { Ok(()) } + + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn test_create_and_deduplicate_blob() -> Result<()> { + let t = TestContext::new().await; + + let blob = BlobObject::create_and_deduplicate_blob(&t, b"bla").await?; + assert_eq!( + blob.name, + "$BLOBDIR/ce940175885d7b78f7b7e9f1396611ff3e6828ebba2ca0d8b6e0f860ef2baf66" + ); + + // The file should be read-only: + fs::write(&blob.to_abs_path(), b"bla blub") + .await + .unwrap_err(); + assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla"); + + let blob2 = BlobObject::create_and_deduplicate_blob(&t, b"bla").await?; + assert_eq!(blob2.name, blob.name); + + let blob3 = BlobObject::create_and_deduplicate_blob(&t, b"blabla").await?; + assert_ne!(blob3.name, blob.name); + + Ok(()) + } } From 47d140adf0a7c67eeb19bb694313c53ee1cf5911 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Mon, 23 Dec 2024 21:55:02 +0100 Subject: [PATCH 13/23] Set the file modification time so that it's not deleted during housekeeping --- src/blob.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/blob.rs b/src/blob.rs index e93765ca53..2fd5c8fecd 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -24,6 +24,7 @@ use crate::constants::{self, MediaQuality}; use crate::context::Context; use crate::events::EventType; use crate::log::LogExt; +use crate::tools::SystemTime; /// Represents a file in the blob directory. /// @@ -197,6 +198,10 @@ impl<'a> BlobObject<'a> { // Looks like the file is read-only and exists already // TODO: Maybe we should check if the file contents are the same, // or at least if the length is the same, and overwrite if not. + + // Set the file to be modified "now", so that it's not deleted during housekeeping + let f = std::fs::File::open(&new_path).context("File::open")?; + f.set_modified(SystemTime::now()).context("set_modified")?; } else { bail!("Failed to write file: {e:#}"); } @@ -844,8 +849,11 @@ fn add_white_bg(img: &mut DynamicImage) { #[cfg(test)] mod tests { + use std::time::Duration; + use super::*; use crate::message::{Message, Viewtype}; + use crate::sql; use crate::test_utils::{self, TestContext}; fn check_image_size(path: impl AsRef, width: u32, height: u32) -> image::DynamicImage { @@ -1639,10 +1647,25 @@ mod tests { .await .unwrap_err(); assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla"); + let modified1 = blob.to_abs_path().metadata()?.modified()?; + + // Create a temporary file & shift the time for 1 hour + // so that we can later test whether everything works fine with housekeeping: + let temp_file = t.get_blobdir().join("temp.txt"); + fs::write(&temp_file, b"temporary data").await?; + SystemTime::shift(Duration::from_secs(65 * 60)); let blob2 = BlobObject::create_and_deduplicate_blob(&t, b"bla").await?; assert_eq!(blob2.name, blob.name); + // The modification time of the file should be updated + // so that it's not deleted during housekeeping: + let modified2 = blob.to_abs_path().metadata()?.modified()?; + assert_ne!(modified1, modified2); + sql::housekeeping(&t).await?; + assert!(blob2.to_abs_path().exists()); + assert_eq!(temp_file.exists(), false); + let blob3 = BlobObject::create_and_deduplicate_blob(&t, b"blabla").await?; assert_ne!(blob3.name, blob.name); From 2cbdc0125b057d1bea84cb5961142216fa6e6c40 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Thu, 26 Dec 2024 23:56:45 +0100 Subject: [PATCH 14/23] Deduplicate the code writing a file --- src/blob.rs | 63 +++++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 2fd5c8fecd..c5c948cbde 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -186,35 +186,40 @@ impl<'a> BlobObject<'a> { context: &'a Context, data: &[u8], ) -> Result> { - task::block_in_place(|| { - let blobdir = context.get_blobdir(); + task::block_in_place(|| BlobObject::create_and_deduplicate_blob_inner(context, data)) + } - let hash = blake3::hash(&data).to_hex(); - let hash = hash.as_str(); - let new_path = context.get_blobdir().join(hash); + fn create_and_deduplicate_blob_inner( + context: &'a Context, + data: &[u8], + ) -> Result> { + let blobdir = context.get_blobdir(); - if let Err(e) = std::fs::write(&new_path, &data) { - if new_path.exists() { - // Looks like the file is read-only and exists already - // TODO: Maybe we should check if the file contents are the same, - // or at least if the length is the same, and overwrite if not. + let hash = blake3::hash(&data).to_hex(); + let hash = hash.as_str(); + let new_path = context.get_blobdir().join(hash); - // Set the file to be modified "now", so that it's not deleted during housekeeping - let f = std::fs::File::open(&new_path).context("File::open")?; - f.set_modified(SystemTime::now()).context("set_modified")?; - } else { - bail!("Failed to write file: {e:#}"); - } + if let Err(e) = std::fs::write(&new_path, &data) { + if new_path.exists() { + // Looks like the file is read-only and exists already + // TODO: Maybe we should check if the file contents are the same, + // or at least if the length is the same, and overwrite if not. + + // Set the file to be modified "now", so that it's not deleted during housekeeping + let f = std::fs::File::open(&new_path).context("File::open")?; + f.set_modified(SystemTime::now()).context("set_modified")?; + } else { + bail!("Failed to write file: {e:#}"); } - set_readonly(&new_path).log_err(context).ok(); + } + set_readonly(&new_path).log_err(context).ok(); - let blob = BlobObject { - blobdir: blobdir, - name: format!("$BLOBDIR/{hash}"), - }; - context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); - Ok(blob) - }) + let blob = BlobObject { + blobdir: blobdir, + name: format!("$BLOBDIR/{hash}"), + }; + context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); + Ok(blob) } /// Creates a blob from a file, possibly copying it to the blobdir. @@ -654,13 +659,9 @@ impl<'a> BlobObject<'a> { encode_img(&img, ofmt, &mut encoded)?; } - let hash = blake3::hash(&encoded).to_hex(); - let hash = hash.as_str(); - let new_path = context.get_blobdir().join(hash); - - std::fs::write(&new_path, &encoded) - .context("failed to write recoded blob to file")?; - self.name = format!("$BLOBDIR/{hash}"); + self.name = BlobObject::create_and_deduplicate_blob_inner(context, &encoded) + .context("failed to write recoded blob to file")? + .name; } Ok(name) From ca901334ab66ec0fdc3217b007407827fc8fce79 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 00:29:18 +0100 Subject: [PATCH 15/23] Use only the first 32 characters of the hash --- src/blob.rs | 48 ++++++++++++++++------------------------ src/chat.rs | 2 +- src/imex/transfer.rs | 2 +- src/receive_imf/tests.rs | 4 ++-- 4 files changed, 23 insertions(+), 33 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index c5c948cbde..ca447c838a 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -146,9 +146,6 @@ impl<'a> BlobObject<'a> { context: &'a Context, src: &Path, ) -> Result> { - // `update_reader()` uses std::fs, so we need to use task::block_in_place(). - // Tokio io also just calls spawn_blocking() internally (e.g. https://docs.rs/tokio/1.42.0/src/tokio/fs/file.rs.html#606 and https://docs.rs/tokio/latest/src/tokio/fs/mod.rs.html#310) - // so we are doing essentially the same here. task::block_in_place(|| { let blobdir = context.get_blobdir(); if !(src.starts_with(blobdir) || src.starts_with("$BLOBDIR/")) { @@ -160,9 +157,8 @@ impl<'a> BlobObject<'a> { .with_context(|| format!("failed to open file {}", src.display()))?; hasher.update_reader(&mut src_file)?; drop(src_file); - let hash = hasher.finalize().to_hex(); - let hash = hash.as_str(); - let new_path = blobdir.join(hash); + let blob = BlobObject::new_from_hash(blobdir, hasher.finalize())?; + let new_path = blob.to_abs_path(); // This will also replace an already-existing file: if let Err(_) = std::fs::rename(src, &new_path) { @@ -171,12 +167,8 @@ impl<'a> BlobObject<'a> { // only works for files that already are in the blobdir, anyway. std::fs::rename(src, &new_path)?; }; - set_readonly(&new_path).log_err(context).ok(); - let blob = BlobObject { - blobdir: blobdir, - name: format!("$BLOBDIR/{hash}"), - }; + set_readonly(&new_path).log_err(context).ok(); context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); Ok(blob) }) @@ -193,11 +185,8 @@ impl<'a> BlobObject<'a> { context: &'a Context, data: &[u8], ) -> Result> { - let blobdir = context.get_blobdir(); - - let hash = blake3::hash(&data).to_hex(); - let hash = hash.as_str(); - let new_path = context.get_blobdir().join(hash); + let blob = BlobObject::new_from_hash(context.get_blobdir(), blake3::hash(&data))?; + let new_path = blob.to_abs_path(); if let Err(e) = std::fs::write(&new_path, &data) { if new_path.exists() { @@ -212,13 +201,19 @@ impl<'a> BlobObject<'a> { bail!("Failed to write file: {e:#}"); } } + set_readonly(&new_path).log_err(context).ok(); + context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); + Ok(blob) + } + fn new_from_hash(blobdir: &Path, hash: blake3::Hash) -> Result> { + let hash = hash.to_hex(); + let hash = hash.as_str().get(0..31).context("Too short hash")?; let blob = BlobObject { blobdir: blobdir, name: format!("$BLOBDIR/{hash}"), }; - context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); Ok(blob) } @@ -1139,8 +1134,7 @@ mod tests { let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); let avatar_path = Path::new(&avatar_blob); assert!( - avatar_blob - .ends_with("d98cd30ed8f2129bf3968420208849d4663bb4e9e124e208b45f43d68acda18e"), + avatar_blob.ends_with("d98cd30ed8f2129bf3968420208849d"), "The avatar filename should be its hash, put instead it's {avatar_blob}" ); let scaled_avatar_size = file_size(&avatar_path).await; @@ -1198,8 +1192,10 @@ mod tests { .await .unwrap(); let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); - assert!(avatar_cfg - .ends_with("9e7f409ac5c92b942cc4f31cee2770ab3f69cf621fb5eb4e430ff6d21b3f37fd")); + assert!( + avatar_cfg.ends_with("9e7f409ac5c92b942cc4f31cee2770a"), + "Avatar file name {avatar_cfg} should end with its hash" + ); check_image_size( avatar_cfg, @@ -1602,10 +1598,7 @@ mod tests { let path = t.get_blobdir().join("anyfile.dat"); fs::write(&path, b"bla").await?; let blob = BlobObject::create_and_deduplicate(&t, &path).await?; - assert_eq!( - blob.name, - "$BLOBDIR/ce940175885d7b78f7b7e9f1396611ff3e6828ebba2ca0d8b6e0f860ef2baf66" - ); + assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f"); assert_eq!(path.exists(), false); // The file should be read-only: @@ -1638,10 +1631,7 @@ mod tests { let t = TestContext::new().await; let blob = BlobObject::create_and_deduplicate_blob(&t, b"bla").await?; - assert_eq!( - blob.name, - "$BLOBDIR/ce940175885d7b78f7b7e9f1396611ff3e6828ebba2ca0d8b6e0f860ef2baf66" - ); + assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f"); // The file should be read-only: fs::write(&blob.to_abs_path(), b"bla blub") diff --git a/src/chat.rs b/src/chat.rs index c386ef30f5..af08205e96 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -7356,7 +7356,7 @@ mod tests { // the file bob receives should not contain BIDI-control characters assert_eq!( - Some("$BLOBDIR/30c0f9c6a167fc2a91285c85be7ea341569b3b39fcc5f77fd34534cade971d20"), + Some("$BLOBDIR/30c0f9c6a167fc2a91285c85be7ea34"), msg.param.get(Param::File), ); assert_eq!( diff --git a/src/imex/transfer.rs b/src/imex/transfer.rs index 793c1f89bc..a41c7e40ba 100644 --- a/src/imex/transfer.rs +++ b/src/imex/transfer.rs @@ -432,7 +432,7 @@ mod tests { let path = msg.get_file(&ctx1).unwrap(); assert_eq!( // That's the hash of the file: - path.with_file_name("ac1d2d284757656a8d41dc40aae4136ff2bbb95c582d8adb8fdde38d224ec3c1"), + path.with_file_name("ac1d2d284757656a8d41dc40aae4136"), path ); assert_eq!("hello.txt", msg.get_filename().unwrap()); diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index f21470e340..fa6bd94ee0 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -1666,7 +1666,7 @@ async fn test_pdf_filename_simple() { assert_eq!( file_path, // That's the blake3 hash of the file content: - "$BLOBDIR/24a6af459cec5d733374aeaa19a6133bd4830864e407a7c0395678f97a7ec8a1" + "$BLOBDIR/24a6af459cec5d733374aeaa19a6133" ); assert_eq!(msg.param.get(Param::Filename).unwrap(), "simple.pdf"); } @@ -3269,7 +3269,7 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { msg.save_file(t, &path2).await.unwrap(); assert_eq!( path.file_name().unwrap().to_str().unwrap(), - "79402cb76f44c5761888f9036992a768fe8f6cdb67846c5adcf0140cbb478180", + "79402cb76f44c5761888f9036992a76", "The hash of the content should always be the same" ); assert_eq!(fs::read_to_string(&path).await.unwrap(), content); From aa772d77a985c2b2c139dcc585f7196d23470267 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 10:35:01 +0100 Subject: [PATCH 16/23] Keep the code repairing Param::Filename extensions for now I'm not sure whether we still need it and the tests pass without it, but also I don't want to introduce a new bug by changing stuff, and it's just 8 lines, anyway. --- src/chat.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/chat.rs b/src/chat.rs index af08205e96..375f40c5f2 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -2741,7 +2741,6 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> { } } msg.param.set(Param::File, blob.as_name()); - // TODO not sure if we still need the next part if let (Some(filename), Some(blob_ext)) = (msg.param.get(Param::Filename), blob.suffix()) { let stem = match filename.rsplit_once('.') { Some((stem, _)) => stem, From 2a25af868a3229a400b3fbf772e77e37b94926d8 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 10:45:19 +0100 Subject: [PATCH 17/23] Some renames, leave `set_file_from_bytes()` being pub for now --- src/blob.rs | 20 ++++++++++---------- src/message.rs | 5 ++--- src/mimeparser.rs | 3 ++- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index ca447c838a..28c9d31546 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -157,7 +157,7 @@ impl<'a> BlobObject<'a> { .with_context(|| format!("failed to open file {}", src.display()))?; hasher.update_reader(&mut src_file)?; drop(src_file); - let blob = BlobObject::new_from_hash(blobdir, hasher.finalize())?; + let blob = BlobObject::from_hash(blobdir, hasher.finalize())?; let new_path = blob.to_abs_path(); // This will also replace an already-existing file: @@ -174,18 +174,18 @@ impl<'a> BlobObject<'a> { }) } - pub async fn create_and_deduplicate_blob( + pub async fn create_and_deduplicate_from_bytes( context: &'a Context, data: &[u8], ) -> Result> { - task::block_in_place(|| BlobObject::create_and_deduplicate_blob_inner(context, data)) + task::block_in_place(|| BlobObject::create_and_deduplicate_from_bytes_inner(context, data)) } - fn create_and_deduplicate_blob_inner( + fn create_and_deduplicate_from_bytes_inner( context: &'a Context, data: &[u8], ) -> Result> { - let blob = BlobObject::new_from_hash(context.get_blobdir(), blake3::hash(&data))?; + let blob = BlobObject::from_hash(context.get_blobdir(), blake3::hash(&data))?; let new_path = blob.to_abs_path(); if let Err(e) = std::fs::write(&new_path, &data) { @@ -207,7 +207,7 @@ impl<'a> BlobObject<'a> { Ok(blob) } - fn new_from_hash(blobdir: &Path, hash: blake3::Hash) -> Result> { + fn from_hash(blobdir: &Path, hash: blake3::Hash) -> Result> { let hash = hash.to_hex(); let hash = hash.as_str().get(0..31).context("Too short hash")?; let blob = BlobObject { @@ -654,7 +654,7 @@ impl<'a> BlobObject<'a> { encode_img(&img, ofmt, &mut encoded)?; } - self.name = BlobObject::create_and_deduplicate_blob_inner(context, &encoded) + self.name = BlobObject::create_and_deduplicate_from_bytes_inner(context, &encoded) .context("failed to write recoded blob to file")? .name; } @@ -1630,7 +1630,7 @@ mod tests { async fn test_create_and_deduplicate_blob() -> Result<()> { let t = TestContext::new().await; - let blob = BlobObject::create_and_deduplicate_blob(&t, b"bla").await?; + let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla").await?; assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f"); // The file should be read-only: @@ -1646,7 +1646,7 @@ mod tests { fs::write(&temp_file, b"temporary data").await?; SystemTime::shift(Duration::from_secs(65 * 60)); - let blob2 = BlobObject::create_and_deduplicate_blob(&t, b"bla").await?; + let blob2 = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla").await?; assert_eq!(blob2.name, blob.name); // The modification time of the file should be updated @@ -1657,7 +1657,7 @@ mod tests { assert!(blob2.to_abs_path().exists()); assert_eq!(temp_file.exists(), false); - let blob3 = BlobObject::create_and_deduplicate_blob(&t, b"blabla").await?; + let blob3 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla").await?; assert_ne!(blob3.name, blob.name); Ok(()) diff --git a/src/message.rs b/src/message.rs index 2a7c72a27c..e87e2f1ae2 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1103,15 +1103,14 @@ impl Message { } /// Creates a new blob and sets it as a file associated with a message. - /// TODO this is set as pub(crate) now since it's not used in any API - pub(crate) async fn set_file_from_bytes( + pub async fn set_file_from_bytes( &mut self, context: &Context, name: &str, data: &[u8], filemime: Option<&str>, ) -> Result<()> { - let blob = BlobObject::create_and_deduplicate_blob(context, data).await?; + let blob = BlobObject::create_and_deduplicate_from_bytes(context, data).await?; self.param.set(Param::Filename, name); self.param.set(Param::File, blob.as_name()); self.param.set_optional(Param::MimeType, filemime); diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 7bfbf4f491..7d3f9bdaa2 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -1371,7 +1371,8 @@ impl MimeMessage { /* we have a regular file attachment, write decoded data to new blob object */ - let blob = match BlobObject::create_and_deduplicate_blob(context, decoded_data).await { + let blob = match BlobObject::create_and_deduplicate_from_bytes(context, decoded_data).await + { Ok(blob) => blob, Err(err) => { error!( From efabdcd0ac2231314cde213d250cd46f0f3f0d77 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 12:49:34 +0100 Subject: [PATCH 18/23] Create blob dir if it doesn't exist --- src/blob.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 28c9d31546..bb311523b5 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -185,10 +185,11 @@ impl<'a> BlobObject<'a> { context: &'a Context, data: &[u8], ) -> Result> { - let blob = BlobObject::from_hash(context.get_blobdir(), blake3::hash(&data))?; + let blobdir = context.get_blobdir(); + let blob = BlobObject::from_hash(blobdir, blake3::hash(&data))?; let new_path = blob.to_abs_path(); - if let Err(e) = std::fs::write(&new_path, &data) { + if let Err(_) = std::fs::write(&new_path, &data) { if new_path.exists() { // Looks like the file is read-only and exists already // TODO: Maybe we should check if the file contents are the same, @@ -198,7 +199,9 @@ impl<'a> BlobObject<'a> { let f = std::fs::File::open(&new_path).context("File::open")?; f.set_modified(SystemTime::now()).context("set_modified")?; } else { - bail!("Failed to write file: {e:#}"); + // Try to create the blob directory + std::fs::create_dir_all(blobdir).log_err(context).ok(); + std::fs::write(&new_path, &data).context("fs::write")?; } } @@ -1627,9 +1630,10 @@ mod tests { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn test_create_and_deduplicate_blob() -> Result<()> { + async fn test_create_and_deduplicate_from_bytes() -> Result<()> { let t = TestContext::new().await; + fs::remove_dir(t.get_blobdir()).await?; let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla").await?; assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f"); From 8b59fc21254c9cefa6dde9b201bea4b594b5c977 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 14:32:57 +0100 Subject: [PATCH 19/23] Document and expose via the C ffi --- deltachat-ffi/deltachat.h | 17 +++++++++++++++++ deltachat-ffi/src/lib.rs | 30 ++++++++++++++++++++++++++++++ src/blob.rs | 21 +++++++++++++++++---- src/message.rs | 11 ++++++++--- 4 files changed, 72 insertions(+), 7 deletions(-) diff --git a/deltachat-ffi/deltachat.h b/deltachat-ffi/deltachat.h index edff3dfb21..bcc1308441 100644 --- a/deltachat-ffi/deltachat.h +++ b/deltachat-ffi/deltachat.h @@ -4726,6 +4726,23 @@ void dc_msg_set_override_sender_name(dc_msg_t* msg, const char* name) void dc_msg_set_file (dc_msg_t* msg, const char* file, const char* filemime); +/** + * Sets the file associated with a message. + * + * The actual current name of the file is ignored, instead `name` is used. + * In order to deduplicate files that contain the same data, + * the file will be renamed to a hash of the file data. + * The file must not be modified after this function was called. + * + * @memberof dc_msg_t + * @param msg The message object. + * @param file The file to attach. + * @param name The original filename of the attachment. + * @param filemime The MIME type of the file. NULL if you don't know or don't care. + */ +void dc_msg_set_file_and_deduplicate(dc_msg_t* msg, const char* file, const char* name, const char* filemime); + + /** * Set the dimensions associated with message object. * Typically this is the width and the height of an image or video associated using dc_msg_set_file(). diff --git a/deltachat-ffi/src/lib.rs b/deltachat-ffi/src/lib.rs index c9c0cf8433..9f50c8c98f 100644 --- a/deltachat-ffi/src/lib.rs +++ b/deltachat-ffi/src/lib.rs @@ -3813,6 +3813,36 @@ pub unsafe extern "C" fn dc_msg_set_file( ) } +#[no_mangle] +pub unsafe extern "C" fn dc_msg_set_file_and_deduplicate( + msg: *mut dc_msg_t, + file: *const libc::c_char, + name: *const libc::c_char, + filemime: *const libc::c_char, +) { + if msg.is_null() || file.is_null() || name.is_null() { + eprintln!("ignoring careless call to dc_msg_set_file()"); + return; + } + let ffi_msg = &mut *msg; + let ctx = &*ffi_msg.context; + + block_on(async move { + ffi_msg + .message + .set_file_and_deduplicate( + ctx, + &as_path(file), + &to_string_lossy(name), + to_opt_string_lossy(filemime).as_deref(), + ) + .await + .context("failed to set file") + .log_err(&*ffi_msg.context) + .ok(); + }); +} + #[no_mangle] pub unsafe extern "C" fn dc_msg_set_dimension( msg: *mut dc_msg_t, diff --git a/src/blob.rs b/src/blob.rs index bb311523b5..242af88bbc 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -75,7 +75,9 @@ impl<'a> BlobObject<'a> { Ok(blob) } - // Creates a new file, returning a tuple of the name and the handle. + /// Creates a new file, returning a tuple of the name and the handle. + /// This uses `create_new(true)` to avoid race conditions + /// when multiple files with the same name are created. async fn create_new_file( context: &Context, dir: &Path, @@ -140,8 +142,12 @@ impl<'a> BlobObject<'a> { Ok(blob) } - /// TODO document - /// TODO what about race conditions when the same file is created multiple times concurrently + /// Creates a blob object from a file that already exists in the blob directory. + /// In order to deduplicate files that contain the same data, + /// the file will be renamed to a hash of the file data. + /// + /// This is done in a in way which avoids race-conditions when multiple files are + /// concurrently created. pub async fn create_and_deduplicate( context: &'a Context, src: &Path, @@ -160,7 +166,8 @@ impl<'a> BlobObject<'a> { let blob = BlobObject::from_hash(blobdir, hasher.finalize())?; let new_path = blob.to_abs_path(); - // This will also replace an already-existing file: + // This will also replace an already-existing file. + // Renaming is atomic, so this will avoid race conditions. if let Err(_) = std::fs::rename(src, &new_path) { // Try a second time in case there was some temporary error. // There is no need to try and create the blobdir since create_and_deduplicate() @@ -174,6 +181,11 @@ impl<'a> BlobObject<'a> { }) } + /// Creates a new blob object with the file contents in `data`. + /// In order to deduplicate files that contain the same data, + /// the file will be renamed to a hash of the file data. + /// + /// The `data` will be written into the file without race-conditions. pub async fn create_and_deduplicate_from_bytes( context: &'a Context, data: &[u8], @@ -189,6 +201,7 @@ impl<'a> BlobObject<'a> { let blob = BlobObject::from_hash(blobdir, blake3::hash(&data))?; let new_path = blob.to_abs_path(); + // This call to `std::fs::write` is thread safe because all threads write the same data. if let Err(_) = std::fs::write(&new_path, &data) { if new_path.exists() { // Looks like the file is read-only and exists already diff --git a/src/message.rs b/src/message.rs index e87e2f1ae2..b4ddf52f58 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1083,10 +1083,11 @@ impl Message { } /// Sets the file associated with a message. - /// The actual current name of the file is ignored, instead `name` is used. - /// The file (may be moved immediately by core) (and must not be modified again after this method was called) /// - /// TODO document, also in deltachat.h + /// The actual current name of the file is ignored, instead `name` is used. + /// In order to deduplicate files that contain the same data, + /// the file will be renamed to a hash of the file data. + /// The file must not be modified after this function was called. pub async fn set_file_and_deduplicate( &mut self, context: &Context, @@ -1103,6 +1104,10 @@ impl Message { } /// Creates a new blob and sets it as a file associated with a message. + /// + /// In order to deduplicate files that contain the same data, + /// the filename will be a hash of the file data. + /// The file must not be modified after this function was called. pub async fn set_file_from_bytes( &mut self, context: &Context, From d474bfb934b90a3c298a822052a1134768447710 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 17:34:00 +0100 Subject: [PATCH 20/23] Use the actual file's name if `name` is None --- deltachat-ffi/deltachat.h | 4 +++- deltachat-ffi/src/lib.rs | 4 ++-- src/blob.rs | 4 ++-- src/chat.rs | 17 +++++++++-------- src/download.rs | 2 +- src/imex/transfer.rs | 2 +- src/location.rs | 2 +- src/message.rs | 14 +++++++++++--- src/receive_imf/tests.rs | 2 +- src/summary.rs | 30 +++++++++++++++--------------- 10 files changed, 46 insertions(+), 35 deletions(-) diff --git a/deltachat-ffi/deltachat.h b/deltachat-ffi/deltachat.h index bcc1308441..153cc9962a 100644 --- a/deltachat-ffi/deltachat.h +++ b/deltachat-ffi/deltachat.h @@ -4729,7 +4729,9 @@ void dc_msg_set_file (dc_msg_t* msg, const char* file, /** * Sets the file associated with a message. * - * The actual current name of the file is ignored, instead `name` is used. + * If `name` is non-null, it is used as the file name + * and the actual current name of the file is ignored. + * * In order to deduplicate files that contain the same data, * the file will be renamed to a hash of the file data. * The file must not be modified after this function was called. diff --git a/deltachat-ffi/src/lib.rs b/deltachat-ffi/src/lib.rs index 9f50c8c98f..b14f5bd58c 100644 --- a/deltachat-ffi/src/lib.rs +++ b/deltachat-ffi/src/lib.rs @@ -3820,7 +3820,7 @@ pub unsafe extern "C" fn dc_msg_set_file_and_deduplicate( name: *const libc::c_char, filemime: *const libc::c_char, ) { - if msg.is_null() || file.is_null() || name.is_null() { + if msg.is_null() || file.is_null() { eprintln!("ignoring careless call to dc_msg_set_file()"); return; } @@ -3833,7 +3833,7 @@ pub unsafe extern "C" fn dc_msg_set_file_and_deduplicate( .set_file_and_deduplicate( ctx, &as_path(file), - &to_string_lossy(name), + to_opt_string_lossy(name).as_deref(), to_opt_string_lossy(filemime).as_deref(), ) .await diff --git a/src/blob.rs b/src/blob.rs index 242af88bbc..19f5723e99 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -1510,7 +1510,7 @@ mod tests { } let mut msg = Message::new(viewtype); - msg.set_file_and_deduplicate(&alice, &file, &file_name, None) + msg.set_file_and_deduplicate(&alice, &file, Some(&file_name), None) .await?; let chat = alice.create_chat(&bob).await; if set_draft { @@ -1569,7 +1569,7 @@ mod tests { .await .context("failed to write file")?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_and_deduplicate(&alice, &file, "file.gif", None) + msg.set_file_and_deduplicate(&alice, &file, Some("file.gif"), None) .await?; let chat = alice.create_chat(&bob).await; let sent = alice.send_msg(chat.id, &mut msg).await; diff --git a/src/chat.rs b/src/chat.rs index 375f40c5f2..d8eb554859 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -6470,7 +6470,7 @@ mod tests { tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&alice, &file, filename, None) + msg.set_file_and_deduplicate(&alice, &file, Some(filename), None) .await?; let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; @@ -6535,7 +6535,7 @@ mod tests { // Images without force_sticker should be turned into [Viewtype::Image] let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) .await .unwrap(); let file = msg.get_file(&alice).unwrap(); @@ -6545,7 +6545,7 @@ mod tests { // Images with `force_sticker = true` should keep [Viewtype::Sticker] let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) .await .unwrap(); msg.force_sticker(); @@ -6556,7 +6556,7 @@ mod tests { // Images with `force_sticker = true` should keep [Viewtype::Sticker] // even on drafted messages let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) .await .unwrap(); msg.force_sticker(); @@ -6599,7 +6599,7 @@ mod tests { let file = alice.get_blobdir().join(file_name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&alice, &file, "sticker.jpg", None) + msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) .await?; // send sticker to bob @@ -7195,7 +7195,8 @@ mod tests { let file = t.get_blobdir().join(name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(msg_type); - msg.set_file_and_deduplicate(t, &file, name, None).await?; + msg.set_file_and_deduplicate(t, &file, Some(name), None) + .await?; send_msg(t, chat_id, &mut msg).await } @@ -7349,7 +7350,7 @@ mod tests { let file = alice.get_blobdir().join("harmless_file.\u{202e}txt.exe"); fs::write(&file, "aaa").await?; let mut msg = Message::new(Viewtype::File); - msg.set_file_and_deduplicate(&alice, &file, "harmless_file.\u{202e}txt.exe", None) + msg.set_file_and_deduplicate(&alice, &file, Some("harmless_file.\u{202e}txt.exe"), None) .await?; let msg = bob.recv_msg(&alice.send_msg(chat_id, &mut msg).await).await; @@ -7689,7 +7690,7 @@ mod tests { let file = alice.get_blobdir().join("screenshot.png"); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_and_deduplicate(&alice, &file, "screenshot.png", None) + msg.set_file_and_deduplicate(&alice, &file, Some("screenshot.png"), None) .await?; let alice_chat = alice.create_chat(&bob).await; diff --git a/src/download.rs b/src/download.rs index fa7aa8ca70..da8e9cd77c 100644 --- a/src/download.rs +++ b/src/download.rs @@ -441,7 +441,7 @@ mod tests { tokio::fs::write(&file, include_bytes!("../test-data/webxdc/minimal.xdc")).await?; let mut instance = Message::new(Viewtype::File); instance - .set_file_and_deduplicate(&alice, &file, "minimal.xdc", None) + .set_file_and_deduplicate(&alice, &file, None, None) .await?; let _sent1 = alice.send_msg(chat_id, &mut instance).await; diff --git a/src/imex/transfer.rs b/src/imex/transfer.rs index a41c7e40ba..a00ef58dd8 100644 --- a/src/imex/transfer.rs +++ b/src/imex/transfer.rs @@ -394,7 +394,7 @@ mod tests { let file = ctx0.get_blobdir().join("hello.txt"); fs::write(&file, "i am attachment").await.unwrap(); let mut msg = Message::new(Viewtype::File); - msg.set_file_and_deduplicate(&ctx0, &file, "hello.txt", Some("text/plain")) + msg.set_file_and_deduplicate(&ctx0, &file, Some("hello.txt"), Some("text/plain")) .await .unwrap(); send_msg(&ctx0, self_chat.id, &mut msg).await.unwrap(); diff --git a/src/location.rs b/src/location.rs index 792ecb9916..156f497480 100644 --- a/src/location.rs +++ b/src/location.rs @@ -1074,7 +1074,7 @@ Content-Disposition: attachment; filename="location.kml" let file = alice.get_blobdir().join(file_name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_and_deduplicate(&alice, &file, "logo.png", None) + msg.set_file_and_deduplicate(&alice, &file, Some("logo.png"), None) .await?; let sent = alice.send_msg(alice_chat.id, &mut msg).await; let alice_msg = Message::load_from_db(&alice, sent.sender_msg_id).await?; diff --git a/src/message.rs b/src/message.rs index b4ddf52f58..2d7051e09f 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1,6 +1,7 @@ //! # Messages and their identifiers. use std::collections::BTreeSet; +use std::ffi::OsStr; use std::path::{Path, PathBuf}; use std::str; @@ -1084,7 +1085,9 @@ impl Message { /// Sets the file associated with a message. /// - /// The actual current name of the file is ignored, instead `name` is used. + /// If `name` is Some, it is used as the file name + /// and the actual current name of the file is ignored. + /// /// In order to deduplicate files that contain the same data, /// the file will be renamed to a hash of the file data. /// The file must not be modified after this function was called. @@ -1092,11 +1095,16 @@ impl Message { &mut self, context: &Context, file: &Path, - name: &str, + name: Option<&str>, filemime: Option<&str>, ) -> Result<()> { let blob = BlobObject::create_and_deduplicate(context, file).await?; - self.param.set(Param::Filename, name); + if let Some(name) = name { + self.param.set(Param::Filename, name); + } else { + let file_name = file.file_name().map(OsStr::to_string_lossy); + self.param.set_optional(Param::Filename, file_name); + } self.param.set(Param::File, blob.as_name()); self.param.set_optional(Param::MimeType, filemime); diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index fa6bd94ee0..71229bcba9 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -3252,7 +3252,7 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { let mut msg_alice = Message::new(Viewtype::File); msg_alice - .set_file_and_deduplicate(&alice, &attachment, filename_sent, None) + .set_file_and_deduplicate(&alice, &attachment, None, None) .await?; let alice_chat = alice.create_chat(&bob).await; let sent = alice.send_msg(alice_chat.id, &mut msg_alice).await; diff --git a/src/summary.rs b/src/summary.rs index 177c6a0fb9..4bbe4206f6 100644 --- a/src/summary.rs +++ b/src/summary.rs @@ -319,7 +319,7 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Image); - msg.set_file_and_deduplicate(&d, &file, "foo.jpg", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“· Image").await; // file names are not added for images @@ -327,14 +327,14 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Image); msg.set_text(some_text.to_string()); - msg.set_file_and_deduplicate(&d, &file, "foo.jpg", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“· bla bla").await; // type is visible by emoji if text is set let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Video); - msg.set_file_and_deduplicate(&d, &file, "foo.mp4", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ„ Video").await; // file names are not added for videos @@ -342,14 +342,14 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Video); msg.set_text(some_text.to_string()); - msg.set_file_and_deduplicate(&d, &file, "foo.mp4", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ„ bla bla").await; // type is visible by emoji if text is set let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Gif); - msg.set_file_and_deduplicate(&d, &file, "foo.gif", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "GIF").await; // file names are not added for GIFs @@ -357,21 +357,21 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Gif); msg.set_text(some_text.to_string()); - msg.set_file_and_deduplicate(&d, &file, "foo.gif", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "GIF \u{2013} bla bla").await; // file names are not added for GIFs let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&d, &file, "foo.png", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.png"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "Sticker").await; // file names are not added for stickers let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Voice); - msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ¤ Voice message").await; // file names are not added for voice messages @@ -379,14 +379,14 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Voice); msg.set_text(some_text.clone()); - msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ¤ bla bla").await; let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Audio); - msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽµ foo.mp3").await; // file name is added for audio @@ -394,7 +394,7 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Audio); msg.set_text(some_text.clone()); - msg.set_file_and_deduplicate(&d, &file, "foo.mp3", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽµ foo.mp3 \u{2013} bla bla").await; // file name and text added for audio @@ -413,7 +413,7 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::File); - msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“Ž foo.bar").await; // file name is added for files @@ -421,7 +421,7 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::File); msg.set_text(some_text.clone()); - msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“Ž foo.bar \u{2013} bla bla").await; // file name is added for files @@ -429,7 +429,7 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::VideochatInvitation); msg.set_text(some_text.clone()); - msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) .await .unwrap(); assert_summary_texts(&msg, ctx, "Video chat invitation").await; // text is not added for videochat invitations @@ -473,7 +473,7 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::File); msg.set_text(some_text.clone()); - msg.set_file_and_deduplicate(&d, &file, "foo.bar", None) + msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) .await .unwrap(); msg.param.set_int(Param::Forwarded, 1); From d0a31304eb4941726ba9f79dd246755033f91588 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 17:40:39 +0100 Subject: [PATCH 21/23] Clippy --- src/blob.rs | 22 +++++++++++----------- src/receive_imf/tests.rs | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/blob.rs b/src/blob.rs index 19f5723e99..4085a1579d 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -168,7 +168,7 @@ impl<'a> BlobObject<'a> { // This will also replace an already-existing file. // Renaming is atomic, so this will avoid race conditions. - if let Err(_) = std::fs::rename(src, &new_path) { + if std::fs::rename(src, &new_path).is_err() { // Try a second time in case there was some temporary error. // There is no need to try and create the blobdir since create_and_deduplicate() // only works for files that already are in the blobdir, anyway. @@ -198,11 +198,11 @@ impl<'a> BlobObject<'a> { data: &[u8], ) -> Result> { let blobdir = context.get_blobdir(); - let blob = BlobObject::from_hash(blobdir, blake3::hash(&data))?; + let blob = BlobObject::from_hash(blobdir, blake3::hash(data))?; let new_path = blob.to_abs_path(); // This call to `std::fs::write` is thread safe because all threads write the same data. - if let Err(_) = std::fs::write(&new_path, &data) { + if std::fs::write(&new_path, data).is_err() { if new_path.exists() { // Looks like the file is read-only and exists already // TODO: Maybe we should check if the file contents are the same, @@ -214,7 +214,7 @@ impl<'a> BlobObject<'a> { } else { // Try to create the blob directory std::fs::create_dir_all(blobdir).log_err(context).ok(); - std::fs::write(&new_path, &data).context("fs::write")?; + std::fs::write(&new_path, data).context("fs::write")?; } } @@ -227,7 +227,7 @@ impl<'a> BlobObject<'a> { let hash = hash.to_hex(); let hash = hash.as_str().get(0..31).context("Too short hash")?; let blob = BlobObject { - blobdir: blobdir, + blobdir, name: format!("$BLOBDIR/{hash}"), }; Ok(blob) @@ -541,7 +541,7 @@ impl<'a> BlobObject<'a> { file.rewind()?; ImageReader::with_format( std::io::BufReader::new(&file), - ImageFormat::from_path(&self.to_abs_path())?, + ImageFormat::from_path(self.to_abs_path())?, ) } }; @@ -695,9 +695,9 @@ impl<'a> BlobObject<'a> { } fn set_readonly(new_path: &Path) -> Result<()> { - let mut perms = std::fs::metadata(&new_path)?.permissions(); + let mut perms = std::fs::metadata(new_path)?.permissions(); perms.set_readonly(true); - std::fs::set_permissions(&new_path, perms)?; + std::fs::set_permissions(new_path, perms)?; Ok(()) } @@ -1153,7 +1153,7 @@ mod tests { avatar_blob.ends_with("d98cd30ed8f2129bf3968420208849d"), "The avatar filename should be its hash, put instead it's {avatar_blob}" ); - let scaled_avatar_size = file_size(&avatar_path).await; + let scaled_avatar_size = file_size(avatar_path).await; assert!(scaled_avatar_size < avatar_bytes.len() as u64); check_image_size(avatar_src, 1000, 1000); @@ -1181,9 +1181,9 @@ mod tests { // The new file should be smaller: assert!(new_file_size < scaled_avatar_size); // And the original file should not be touched: - assert_eq!(file_size(&avatar_path).await, scaled_avatar_size); + assert_eq!(file_size(avatar_path).await, scaled_avatar_size); tokio::task::block_in_place(move || { - let img = ImageReader::open(&blob.to_abs_path()) + let img = ImageReader::open(blob.to_abs_path()) .unwrap() .with_guessed_format() .unwrap() diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index 71229bcba9..b85ce80c18 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -3247,7 +3247,7 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { "a. tar.tar.gz", ] { let attachment = alice.blobdir.join(filename_sent); - let content = format!("File content of tar.gz archive"); + let content = "File content of tar.gz archive".to_string(); tokio::fs::write(&attachment, content.as_bytes()).await?; let mut msg_alice = Message::new(Viewtype::File); From aa9dc2b32f3c9b86f7a838f0c892fbaaee7eab30 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Fri, 27 Dec 2024 17:47:13 +0100 Subject: [PATCH 22/23] clippy: Make functions that are not async not be async --- deltachat-ffi/src/lib.rs | 25 +++++------ src/blob.rs | 87 +++++++++++++++++++-------------------- src/chat.rs | 18 +++----- src/download.rs | 4 +- src/imex/transfer.rs | 1 - src/location.rs | 3 +- src/message.rs | 15 +++---- src/mimefactory.rs | 3 +- src/mimeparser.rs | 3 +- src/peer_channels.rs | 3 -- src/receive_imf/tests.rs | 17 +++----- src/summary.rs | 21 +--------- src/webxdc.rs | 20 ++++----- src/webxdc/integration.rs | 6 +-- 14 files changed, 84 insertions(+), 142 deletions(-) diff --git a/deltachat-ffi/src/lib.rs b/deltachat-ffi/src/lib.rs index b14f5bd58c..c4e209c7d4 100644 --- a/deltachat-ffi/src/lib.rs +++ b/deltachat-ffi/src/lib.rs @@ -3827,20 +3827,17 @@ pub unsafe extern "C" fn dc_msg_set_file_and_deduplicate( let ffi_msg = &mut *msg; let ctx = &*ffi_msg.context; - block_on(async move { - ffi_msg - .message - .set_file_and_deduplicate( - ctx, - &as_path(file), - to_opt_string_lossy(name).as_deref(), - to_opt_string_lossy(filemime).as_deref(), - ) - .await - .context("failed to set file") - .log_err(&*ffi_msg.context) - .ok(); - }); + ffi_msg + .message + .set_file_and_deduplicate( + ctx, + &as_path(file), + to_opt_string_lossy(name).as_deref(), + to_opt_string_lossy(filemime).as_deref(), + ) + .context("failed to set file") + .log_err(&*ffi_msg.context) + .ok(); } #[no_mangle] diff --git a/src/blob.rs b/src/blob.rs index 4085a1579d..3749249d52 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -148,10 +148,11 @@ impl<'a> BlobObject<'a> { /// /// This is done in a in way which avoids race-conditions when multiple files are /// concurrently created. - pub async fn create_and_deduplicate( - context: &'a Context, - src: &Path, - ) -> Result> { + pub fn create_and_deduplicate(context: &'a Context, src: &Path) -> Result> { + // `create_and_deduplicate{_from_bytes}()` do blocking I/O, but can still be called + // from an async context thanks to `block_in_place()`. + // Tokio's "async" I/O functions are also just thin wrappers around the blocking I/O syscalls, + // so we are doing essentially the same here. task::block_in_place(|| { let blobdir = context.get_blobdir(); if !(src.starts_with(blobdir) || src.starts_with("$BLOBDIR/")) { @@ -186,41 +187,39 @@ impl<'a> BlobObject<'a> { /// the file will be renamed to a hash of the file data. /// /// The `data` will be written into the file without race-conditions. - pub async fn create_and_deduplicate_from_bytes( + /// + /// This function does blocking I/O, but it can still be called from an async context + /// because `block_in_place()` is used to leave the async runtime if necessary. + pub fn create_and_deduplicate_from_bytes( context: &'a Context, data: &[u8], ) -> Result> { - task::block_in_place(|| BlobObject::create_and_deduplicate_from_bytes_inner(context, data)) - } + task::block_in_place(|| { + let blobdir = context.get_blobdir(); + let blob = BlobObject::from_hash(blobdir, blake3::hash(data))?; + let new_path = blob.to_abs_path(); - fn create_and_deduplicate_from_bytes_inner( - context: &'a Context, - data: &[u8], - ) -> Result> { - let blobdir = context.get_blobdir(); - let blob = BlobObject::from_hash(blobdir, blake3::hash(data))?; - let new_path = blob.to_abs_path(); - - // This call to `std::fs::write` is thread safe because all threads write the same data. - if std::fs::write(&new_path, data).is_err() { - if new_path.exists() { - // Looks like the file is read-only and exists already - // TODO: Maybe we should check if the file contents are the same, - // or at least if the length is the same, and overwrite if not. - - // Set the file to be modified "now", so that it's not deleted during housekeeping - let f = std::fs::File::open(&new_path).context("File::open")?; - f.set_modified(SystemTime::now()).context("set_modified")?; - } else { - // Try to create the blob directory - std::fs::create_dir_all(blobdir).log_err(context).ok(); - std::fs::write(&new_path, data).context("fs::write")?; + // This call to `std::fs::write` is thread safe because all threads write the same data. + if std::fs::write(&new_path, data).is_err() { + if new_path.exists() { + // Looks like the file is read-only and exists already + // TODO: Maybe we should check if the file contents are the same, + // or at least if the length is the same, and overwrite if not. + + // Set the file to be modified "now", so that it's not deleted during housekeeping + let f = std::fs::File::open(&new_path).context("File::open")?; + f.set_modified(SystemTime::now()).context("set_modified")?; + } else { + // Try to create the blob directory + std::fs::create_dir_all(blobdir).log_err(context).ok(); + std::fs::write(&new_path, data).context("fs::write")?; + } } - } - set_readonly(&new_path).log_err(context).ok(); - context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); - Ok(blob) + set_readonly(&new_path).log_err(context).ok(); + context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); + Ok(blob) + }) } fn from_hash(blobdir: &Path, hash: blake3::Hash) -> Result> { @@ -670,7 +669,7 @@ impl<'a> BlobObject<'a> { encode_img(&img, ofmt, &mut encoded)?; } - self.name = BlobObject::create_and_deduplicate_from_bytes_inner(context, &encoded) + self.name = BlobObject::create_and_deduplicate_from_bytes(context, &encoded) .context("failed to write recoded blob to file")? .name; } @@ -1510,8 +1509,7 @@ mod tests { } let mut msg = Message::new(viewtype); - msg.set_file_and_deduplicate(&alice, &file, Some(&file_name), None) - .await?; + msg.set_file_and_deduplicate(&alice, &file, Some(&file_name), None)?; let chat = alice.create_chat(&bob).await; if set_draft { chat.id.set_draft(&alice, Some(&mut msg)).await.unwrap(); @@ -1569,8 +1567,7 @@ mod tests { .await .context("failed to write file")?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_and_deduplicate(&alice, &file, Some("file.gif"), None) - .await?; + msg.set_file_and_deduplicate(&alice, &file, Some("file.gif"), None)?; let chat = alice.create_chat(&bob).await; let sent = alice.send_msg(chat.id, &mut msg).await; let bob_msg = bob.recv_msg(&sent).await; @@ -1613,7 +1610,7 @@ mod tests { let path = t.get_blobdir().join("anyfile.dat"); fs::write(&path, b"bla").await?; - let blob = BlobObject::create_and_deduplicate(&t, &path).await?; + let blob = BlobObject::create_and_deduplicate(&t, &path)?; assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f"); assert_eq!(path.exists(), false); @@ -1624,19 +1621,19 @@ mod tests { assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla"); fs::write(&path, b"bla").await?; - let blob2 = BlobObject::create_and_deduplicate(&t, &path).await?; + let blob2 = BlobObject::create_and_deduplicate(&t, &path)?; assert_eq!(blob2.name, blob.name); let path_outside_blobdir = t.dir.path().join("anyfile.dat"); fs::write(&path_outside_blobdir, b"bla").await?; - let blob_res = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir).await; + let blob_res = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir); assert!( blob_res.is_err(), "Files outside the blobdir should not be allowed in create_and_deduplicate()" ); fs::write(&path, b"blabla").await?; - let blob3 = BlobObject::create_and_deduplicate(&t, &path).await?; + let blob3 = BlobObject::create_and_deduplicate(&t, &path)?; assert_ne!(blob3.name, blob.name); Ok(()) @@ -1647,7 +1644,7 @@ mod tests { let t = TestContext::new().await; fs::remove_dir(t.get_blobdir()).await?; - let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla").await?; + let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla")?; assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f"); // The file should be read-only: @@ -1663,7 +1660,7 @@ mod tests { fs::write(&temp_file, b"temporary data").await?; SystemTime::shift(Duration::from_secs(65 * 60)); - let blob2 = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla").await?; + let blob2 = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla")?; assert_eq!(blob2.name, blob.name); // The modification time of the file should be updated @@ -1674,7 +1671,7 @@ mod tests { assert!(blob2.to_abs_path().exists()); assert_eq!(temp_file.exists(), false); - let blob3 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla").await?; + let blob3 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla")?; assert_ne!(blob3.name, blob.name); Ok(()) diff --git a/src/chat.rs b/src/chat.rs index d8eb554859..063abd8457 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -6470,8 +6470,7 @@ mod tests { tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&alice, &file, Some(filename), None) - .await?; + msg.set_file_and_deduplicate(&alice, &file, Some(filename), None)?; let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; let mime = sent_msg.payload(); @@ -6536,7 +6535,6 @@ mod tests { // Images without force_sticker should be turned into [Viewtype::Image] let mut msg = Message::new(Viewtype::Sticker); msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) - .await .unwrap(); let file = msg.get_file(&alice).unwrap(); let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; @@ -6546,7 +6544,6 @@ mod tests { // Images with `force_sticker = true` should keep [Viewtype::Sticker] let mut msg = Message::new(Viewtype::Sticker); msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) - .await .unwrap(); msg.force_sticker(); let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await; @@ -6557,7 +6554,6 @@ mod tests { // even on drafted messages let mut msg = Message::new(Viewtype::Sticker); msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) - .await .unwrap(); msg.force_sticker(); alice_chat @@ -6599,8 +6595,7 @@ mod tests { let file = alice.get_blobdir().join(file_name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Sticker); - msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None) - .await?; + msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None)?; // send sticker to bob let sent_msg = alice.send_msg(alice_chat.get_id(), &mut msg).await; @@ -7195,8 +7190,7 @@ mod tests { let file = t.get_blobdir().join(name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(msg_type); - msg.set_file_and_deduplicate(t, &file, Some(name), None) - .await?; + msg.set_file_and_deduplicate(t, &file, Some(name), None)?; send_msg(t, chat_id, &mut msg).await } @@ -7350,8 +7344,7 @@ mod tests { let file = alice.get_blobdir().join("harmless_file.\u{202e}txt.exe"); fs::write(&file, "aaa").await?; let mut msg = Message::new(Viewtype::File); - msg.set_file_and_deduplicate(&alice, &file, Some("harmless_file.\u{202e}txt.exe"), None) - .await?; + msg.set_file_and_deduplicate(&alice, &file, Some("harmless_file.\u{202e}txt.exe"), None)?; let msg = bob.recv_msg(&alice.send_msg(chat_id, &mut msg).await).await; // the file bob receives should not contain BIDI-control characters @@ -7690,8 +7683,7 @@ mod tests { let file = alice.get_blobdir().join("screenshot.png"); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_and_deduplicate(&alice, &file, Some("screenshot.png"), None) - .await?; + msg.set_file_and_deduplicate(&alice, &file, Some("screenshot.png"), None)?; let alice_chat = alice.create_chat(&bob).await; let sent_msg = alice.send_msg(alice_chat.get_id(), &mut msg).await; diff --git a/src/download.rs b/src/download.rs index da8e9cd77c..c8a75227a0 100644 --- a/src/download.rs +++ b/src/download.rs @@ -440,9 +440,7 @@ mod tests { let file = alice.get_blobdir().join("minimal.xdc"); tokio::fs::write(&file, include_bytes!("../test-data/webxdc/minimal.xdc")).await?; let mut instance = Message::new(Viewtype::File); - instance - .set_file_and_deduplicate(&alice, &file, None, None) - .await?; + instance.set_file_and_deduplicate(&alice, &file, None, None)?; let _sent1 = alice.send_msg(chat_id, &mut instance).await; alice diff --git a/src/imex/transfer.rs b/src/imex/transfer.rs index a00ef58dd8..0fe28893b1 100644 --- a/src/imex/transfer.rs +++ b/src/imex/transfer.rs @@ -395,7 +395,6 @@ mod tests { fs::write(&file, "i am attachment").await.unwrap(); let mut msg = Message::new(Viewtype::File); msg.set_file_and_deduplicate(&ctx0, &file, Some("hello.txt"), Some("text/plain")) - .await .unwrap(); send_msg(&ctx0, self_chat.id, &mut msg).await.unwrap(); diff --git a/src/location.rs b/src/location.rs index 156f497480..2b32b19409 100644 --- a/src/location.rs +++ b/src/location.rs @@ -1074,8 +1074,7 @@ Content-Disposition: attachment; filename="location.kml" let file = alice.get_blobdir().join(file_name); tokio::fs::write(&file, bytes).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_and_deduplicate(&alice, &file, Some("logo.png"), None) - .await?; + msg.set_file_and_deduplicate(&alice, &file, Some("logo.png"), None)?; let sent = alice.send_msg(alice_chat.id, &mut msg).await; let alice_msg = Message::load_from_db(&alice, sent.sender_msg_id).await?; assert_eq!(alice_msg.has_location(), false); diff --git a/src/message.rs b/src/message.rs index 2d7051e09f..1a1503e237 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1091,14 +1091,14 @@ impl Message { /// In order to deduplicate files that contain the same data, /// the file will be renamed to a hash of the file data. /// The file must not be modified after this function was called. - pub async fn set_file_and_deduplicate( + pub fn set_file_and_deduplicate( &mut self, context: &Context, file: &Path, name: Option<&str>, filemime: Option<&str>, ) -> Result<()> { - let blob = BlobObject::create_and_deduplicate(context, file).await?; + let blob = BlobObject::create_and_deduplicate(context, file)?; if let Some(name) = name { self.param.set(Param::Filename, name); } else { @@ -1116,14 +1116,14 @@ impl Message { /// In order to deduplicate files that contain the same data, /// the filename will be a hash of the file data. /// The file must not be modified after this function was called. - pub async fn set_file_from_bytes( + pub fn set_file_from_bytes( &mut self, context: &Context, name: &str, data: &[u8], filemime: Option<&str>, ) -> Result<()> { - let blob = BlobObject::create_and_deduplicate_from_bytes(context, data).await?; + let blob = BlobObject::create_and_deduplicate_from_bytes(context, data)?; self.param.set(Param::Filename, name); self.param.set(Param::File, blob.as_name()); self.param.set_optional(Param::MimeType, filemime); @@ -1140,7 +1140,6 @@ impl Message { ); let vcard = contact::make_vcard(context, contacts).await?; self.set_file_from_bytes(context, "vcard.vcf", vcard.as_bytes(), None) - .await } /// Updates message state from the vCard attachment. @@ -2598,8 +2597,7 @@ mod tests { let file_bytes = include_bytes!("../test-data/image/screenshot.png"); let mut msg = Message::new(Viewtype::Image); - msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None) - .await?; + msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None)?; let sent_msg = bob.send_msg(bob_chat_id, &mut msg).await; let msg = alice.recv_msg(&sent_msg).await; assert_eq!(msg.download_state, DownloadState::Available); @@ -2668,8 +2666,7 @@ mod tests { let file_bytes = include_bytes!("../test-data/image/screenshot.png"); let mut msg = Message::new(Viewtype::Image); - msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None) - .await?; + msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None)?; let sent_msg = bob.send_msg(bob_chat_id, &mut msg).await; let msg = alice.recv_msg(&sent_msg).await; assert_eq!(msg.download_state, DownloadState::Available); diff --git a/src/mimefactory.rs b/src/mimefactory.rs index 4b1fabe225..a9842ab5ee 100644 --- a/src/mimefactory.rs +++ b/src/mimefactory.rs @@ -2525,8 +2525,7 @@ mod tests { // Long messages are truncated and MimeMessage::decoded_data is set for them. We need // decoded_data to check presence of the necessary headers. msg.set_text("a".repeat(constants::DC_DESIRED_TEXT_LEN + 1)); - msg.set_file_from_bytes(&bob, "foo.bar", "content".as_bytes(), None) - .await?; + msg.set_file_from_bytes(&bob, "foo.bar", "content".as_bytes(), None)?; let sent = bob.send_msg(chat, &mut msg).await; assert!(msg.get_showpadlock()); assert!(sent.payload.contains("\r\nSubject: [...]\r\n")); diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 7d3f9bdaa2..a3e5998546 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -1371,8 +1371,7 @@ impl MimeMessage { /* we have a regular file attachment, write decoded data to new blob object */ - let blob = match BlobObject::create_and_deduplicate_from_bytes(context, decoded_data).await - { + let blob = match BlobObject::create_and_deduplicate_from_bytes(context, decoded_data) { Ok(blob) => blob, Err(err) => { error!( diff --git a/src/peer_channels.rs b/src/peer_channels.rs index feccc73880..6345fcf2e5 100644 --- a/src/peer_channels.rs +++ b/src/peer_channels.rs @@ -629,7 +629,6 @@ mod tests { include_bytes!("../test-data/webxdc/minimal.xdc"), None, ) - .await .unwrap(); send_msg(alice, alice_chat.id, &mut instance).await.unwrap(); @@ -801,7 +800,6 @@ mod tests { include_bytes!("../test-data/webxdc/minimal.xdc"), None, ) - .await .unwrap(); send_msg(alice, alice_chat.id, &mut instance).await.unwrap(); @@ -985,7 +983,6 @@ mod tests { include_bytes!("../test-data/webxdc/minimal.xdc"), None, ) - .await .unwrap(); send_msg(alice, alice_chat.id, &mut instance).await.unwrap(); let alice_webxdc = alice.get_last_msg().await; diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index b85ce80c18..23e8be6c65 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -3251,9 +3251,7 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> { tokio::fs::write(&attachment, content.as_bytes()).await?; let mut msg_alice = Message::new(Viewtype::File); - msg_alice - .set_file_and_deduplicate(&alice, &attachment, None, None) - .await?; + msg_alice.set_file_and_deduplicate(&alice, &attachment, None, None)?; let alice_chat = alice.create_chat(&bob).await; let sent = alice.send_msg(alice_chat.id, &mut msg_alice).await; println!("{}", sent.payload()); @@ -4704,8 +4702,7 @@ async fn test_create_group_with_big_msg() -> Result<()> { let bob_grp_id = create_group_chat(&bob, ProtectionStatus::Unprotected, "Group").await?; add_contact_to_chat(&bob, bob_grp_id, ba_contact).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None) - .await?; + msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None)?; let sent_msg = bob.send_msg(bob_grp_id, &mut msg).await; assert!(!msg.get_showpadlock()); @@ -4741,8 +4738,7 @@ async fn test_create_group_with_big_msg() -> Result<()> { let bob_grp_id = create_group_chat(&bob, ProtectionStatus::Unprotected, "Group1").await?; add_contact_to_chat(&bob, bob_grp_id, ba_contact).await?; let mut msg = Message::new(Viewtype::Image); - msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None) - .await?; + msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None)?; let sent_msg = bob.send_msg(bob_grp_id, &mut msg).await; assert!(msg.get_showpadlock()); @@ -5207,8 +5203,7 @@ async fn test_prefer_references_to_downloaded_msgs() -> Result<()> { let file_bytes = include_bytes!("../../test-data/image/screenshot.gif"); let mut msg = Message::new(Viewtype::File); - msg.set_file_from_bytes(alice, "file", file_bytes, None) - .await?; + msg.set_file_from_bytes(alice, "file", file_bytes, None)?; let mut sent = alice.send_msg(alice_chat_id, &mut msg).await; sent.payload = sent .payload @@ -5220,8 +5215,7 @@ async fn test_prefer_references_to_downloaded_msgs() -> Result<()> { assert_eq!(received.chat_id, bob.get_chat(alice).await.id); let mut msg = Message::new(Viewtype::File); - msg.set_file_from_bytes(alice, "file", file_bytes, None) - .await?; + msg.set_file_from_bytes(alice, "file", file_bytes, None)?; let sent = alice.send_msg(alice_chat_id, &mut msg).await; let received = bob.recv_msg(&sent).await; assert_eq!(received.download_state, DownloadState::Available); @@ -5280,7 +5274,6 @@ async fn test_receive_vcard() -> Result<()> { .as_bytes(), None, ) - .await .unwrap(); let alice_bob_chat = alice.create_chat(bob).await; diff --git a/src/summary.rs b/src/summary.rs index 4bbe4206f6..d2f55f3053 100644 --- a/src/summary.rs +++ b/src/summary.rs @@ -320,7 +320,6 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Image); msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“· Image").await; // file names are not added for images @@ -328,14 +327,12 @@ mod tests { let mut msg = Message::new(Viewtype::Image); msg.set_text(some_text.to_string()); msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“· bla bla").await; // type is visible by emoji if text is set let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Video); msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ„ Video").await; // file names are not added for videos @@ -343,14 +340,12 @@ mod tests { let mut msg = Message::new(Viewtype::Video); msg.set_text(some_text.to_string()); msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ„ bla bla").await; // type is visible by emoji if text is set let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Gif); msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "GIF").await; // file names are not added for GIFs @@ -358,21 +353,18 @@ mod tests { let mut msg = Message::new(Viewtype::Gif); msg.set_text(some_text.to_string()); msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "GIF \u{2013} bla bla").await; // file names are not added for GIFs let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Sticker); msg.set_file_and_deduplicate(&d, &file, Some("foo.png"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "Sticker").await; // file names are not added for stickers let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Voice); msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ¤ Voice message").await; // file names are not added for voice messages @@ -380,14 +372,12 @@ mod tests { let mut msg = Message::new(Viewtype::Voice); msg.set_text(some_text.clone()); msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽ¤ bla bla").await; let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::Audio); msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽµ foo.mp3").await; // file name is added for audio @@ -395,14 +385,12 @@ mod tests { let mut msg = Message::new(Viewtype::Audio); msg.set_text(some_text.clone()); msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸŽµ foo.mp3 \u{2013} bla bla").await; // file name and text added for audio let mut msg = Message::new(Viewtype::File); let bytes = include_bytes!("../test-data/webxdc/with-minimal-manifest.xdc"); msg.set_file_from_bytes(ctx, "foo.xdc", bytes, None) - .await .unwrap(); chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap(); assert_eq!(msg.viewtype, Viewtype::Webxdc); @@ -414,7 +402,6 @@ mod tests { let file = write_file_to_blobdir(&d).await; let mut msg = Message::new(Viewtype::File); msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“Ž foo.bar").await; // file name is added for files @@ -422,7 +409,6 @@ mod tests { let mut msg = Message::new(Viewtype::File); msg.set_text(some_text.clone()); msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "šŸ“Ž foo.bar \u{2013} bla bla").await; // file name is added for files @@ -430,14 +416,11 @@ mod tests { let mut msg = Message::new(Viewtype::VideochatInvitation); msg.set_text(some_text.clone()); msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) - .await .unwrap(); assert_summary_texts(&msg, ctx, "Video chat invitation").await; // text is not added for videochat invitations let mut msg = Message::new(Viewtype::Vcard); - msg.set_file_from_bytes(ctx, "foo.vcf", b"", None) - .await - .unwrap(); + msg.set_file_from_bytes(ctx, "foo.vcf", b"", None).unwrap(); chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap(); // If a vCard can't be parsed, the message becomes `Viewtype::File`. assert_eq!(msg.viewtype, Viewtype::File); @@ -457,7 +440,6 @@ mod tests { END:VCARD", None, ) - .await .unwrap(); chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap(); assert_eq!(msg.viewtype, Viewtype::Vcard); @@ -474,7 +456,6 @@ mod tests { let mut msg = Message::new(Viewtype::File); msg.set_text(some_text.clone()); msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None) - .await .unwrap(); msg.param.set_int(Param::Forwarded, 1); assert_eq!( diff --git a/src/webxdc.rs b/src/webxdc.rs index 49e75e9b72..b5fb828b18 100644 --- a/src/webxdc.rs +++ b/src/webxdc.rs @@ -1049,7 +1049,7 @@ mod tests { async fn create_webxdc_instance(t: &TestContext, name: &str, bytes: &[u8]) -> Result { let mut instance = Message::new(Viewtype::File); - instance.set_file_from_bytes(t, name, bytes, None).await?; + instance.set_file_from_bytes(t, name, bytes, None)?; Ok(instance) } @@ -1078,9 +1078,7 @@ mod tests { // sending using bad extension is not working, even when setting Viewtype to webxdc let mut instance = Message::new(Viewtype::Webxdc); - instance - .set_file_from_bytes(&t, "index.html", b"ola!", None) - .await?; + instance.set_file_from_bytes(&t, "index.html", b"ola!", None)?; assert!(send_msg(&t, chat_id, &mut instance).await.is_err()); Ok(()) @@ -1105,14 +1103,12 @@ mod tests { // sending invalid .xdc as Viewtype::Webxdc should fail already on sending let mut instance = Message::new(Viewtype::Webxdc); - instance - .set_file_from_bytes( - &t, - "invalid2.xdc", - include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"), - None, - ) - .await?; + instance.set_file_from_bytes( + &t, + "invalid2.xdc", + include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"), + None, + )?; assert!(send_msg(&t, chat_id, &mut instance).await.is_err()); Ok(()) diff --git a/src/webxdc/integration.rs b/src/webxdc/integration.rs index b07600def6..fad7d20962 100644 --- a/src/webxdc/integration.rs +++ b/src/webxdc/integration.rs @@ -190,8 +190,7 @@ mod tests { "mapstest.xdc", include_bytes!("../../test-data/webxdc/mapstest-integration-unset.xdc"), None, - ) - .await?; + )?; t.send_msg(self_chat.id, &mut msg).await; assert_integration(&t, "with some icon").await?; // still the default integration @@ -202,8 +201,7 @@ mod tests { "mapstest.xdc", include_bytes!("../../test-data/webxdc/mapstest-integration-set.xdc"), None, - ) - .await?; + )?; let sent = t.send_msg(self_chat.id, &mut msg).await; let info = msg.get_webxdc_info(&t).await?; assert!(info.summary.contains("Used as map")); From f492186bd00736b23a5610557b08ff2ba6d00c89 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Mon, 6 Jan 2025 20:10:56 +0100 Subject: [PATCH 23/23] Fix mistake I made when rebasing --- src/mimefactory.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mimefactory.rs b/src/mimefactory.rs index a9842ab5ee..bd2323ca61 100644 --- a/src/mimefactory.rs +++ b/src/mimefactory.rs @@ -1512,7 +1512,7 @@ pub(crate) fn wrapped_base64_encode(buf: &[u8]) -> String { .join("\r\n") } -async fn build_body_file(context: &Context, msg: &Message) -> Result<(PartBuilder, String)> { +async fn build_body_file(context: &Context, msg: &Message) -> Result { let file_name = msg.get_filename().context("msg has no file")?; let suffix = Path::new(&file_name) .extension()