Skip to content

Commit

Permalink
feat: Remove original file stem from filenames in the blobstorage (#4309
Browse files Browse the repository at this point in the history
)

This way filenames in the blobstorage are just random hex numbers. This also allows us to get rid of
the `sanitize-filename` dependency.

This also requires `Param::Filename` to be set to "debug_logging*.xdc" for messages containing
logging webxdc-s, otherwise they are not detected properly. This is done in "fix:
Message::set_file_from_bytes(): Set Param::Filename", so don't forget to update senders as well.
  • Loading branch information
iequidoo committed Apr 24, 2024
1 parent 2c31fb5 commit e1d8a30
Show file tree
Hide file tree
Showing 12 changed files with 43 additions and 118 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ regex = { workspace = true }
reqwest = { version = "0.12.2", features = ["json"] }
rusqlite = { workspace = true, features = ["sqlcipher"] }
rust-hsluv = "0.1"
sanitize-filename = "0.5"
serde_json = "1"
serde = { version = "1.0", features = ["derive"] }
sha-1 = "0.10"
Expand Down
1 change: 0 additions & 1 deletion node/test/test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,6 @@ describe('Offline Tests with unconfigured account', function () {
context.setChatProfileImage(chatId, imagePath)
const blobPath = context.getChat(chatId).getProfileImage()
expect(blobPath.startsWith(blobs)).to.be.true
expect(blobPath.includes('image')).to.be.true
expect(blobPath.endsWith('.jpeg')).to.be.true

context.setChatProfileImage(chatId, null)
Expand Down
3 changes: 0 additions & 3 deletions python/tests/test_1_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,11 @@ def send_and_receive_message():
msg = send_and_receive_message()
assert msg.text == "withfile"
assert open(msg.file_path).read() == "some data"
msg.file_path.index(basename)
assert msg.file_path.endswith(ext)

msg2 = send_and_receive_message()
assert msg2.text == "withfile"
assert open(msg2.file_path).read() == "some data"
msg2.file_path.index(basename)
assert msg2.file_path.endswith(ext)
assert msg.file_path != msg2.file_path

Expand All @@ -214,7 +212,6 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp):
msg = ac2.get_message_by_id(ev.data2)

assert open(msg.file_path).read() == content
msg.file_path.index(basename)
assert msg.file_path.endswith(ext)


Expand Down
1 change: 0 additions & 1 deletion python/tests/test_2_increation.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def test_no_increation_copies_to_blobdir(self, tmp_path, acfactory, lp):
src = tmp_path / "file.txt"
src.write_text("hello there\n")
msg = chat.send_file(str(src))
assert msg.file_path.startswith(os.path.join(ac1.get_blobdir(), "file"))
assert msg.file_path.endswith(".txt")

def test_forward_increation(self, acfactory, data, lp):
Expand Down
123 changes: 32 additions & 91 deletions src/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ impl<'a> BlobObject<'a> {
data: &[u8],
) -> Result<BlobObject<'a>> {
let blobdir = context.get_blobdir();
let (stem, ext) = BlobObject::sanitise_name(suggested_name);
let (name, mut file) = BlobObject::create_new_file(context, blobdir, &stem, &ext).await?;
let ext = BlobObject::get_extension(suggested_name);
let (name, mut file) = BlobObject::create_new_file(context, blobdir, &ext).await?;
file.write_all(data).await.context("file write failure")?;

// workaround a bug in async-std
Expand All @@ -77,12 +77,11 @@ impl<'a> BlobObject<'a> {
async fn create_new_file(
context: &Context,
dir: &Path,
stem: &str,
ext: &str,
) -> Result<(String, fs::File)> {
let mut attempt = 0;
loop {
let name = format!("{}-{:016x}{}", stem, rand::random::<u64>(), ext);
let name = format!("{:016x}{}", rand::random::<u64>(), ext);
attempt += 1;
let path = dir.join(&name);
match fs::OpenOptions::new()
Expand Down Expand Up @@ -113,9 +112,9 @@ impl<'a> BlobObject<'a> {
let mut src_file = fs::File::open(src)
.await
.with_context(|| format!("failed to open file {}", src.display()))?;
let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy());
let ext = BlobObject::get_extension(&src.to_string_lossy());
let (name, mut dst_file) =
BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?;
BlobObject::create_new_file(context, context.get_blobdir(), &ext).await?;
let name_for_err = name.clone();
if let Err(err) = io::copy(&mut src_file, &mut dst_file).await {
// Attempt to remove the failed file, swallow errors resulting from that.
Expand All @@ -139,10 +138,8 @@ impl<'a> BlobObject<'a> {
///
/// If the source file is not a path to into the blob directory
/// the file will be copied into the blob directory first. If the
/// source file is already in the blobdir it will not be copied
/// and only be created if it is a valid blobname, that is no
/// subdirectory is used and [BlobObject::sanitise_name] does not
/// modify the filename.
/// source file is already in the blobdir (but not in a subdirectory)
/// it will not be copied and only be created if it is a valid blobname.
///
/// Paths into the blob directory may be either defined by an absolute path
/// or by the relative prefix `$BLOBDIR`.
Expand All @@ -159,8 +156,7 @@ impl<'a> BlobObject<'a> {
/// Returns a [BlobObject] for an existing blob from a path.
///
/// The path must designate a file directly in the blobdir and
/// must use a valid blob name. That is after sanitisation the
/// name must still be the same, that means it must be valid UTF-8
/// must use a valid blob name. That means it must be valid UTF-8
/// and not have any special characters in it.
pub fn from_path(context: &'a Context, path: &Path) -> Result<BlobObject<'a>> {
let rel_path = path
Expand Down Expand Up @@ -234,21 +230,8 @@ impl<'a> BlobObject<'a> {
}
}

/// Create a safe name based on a messy input string.
///
/// The safe name will be a valid filename on Unix and Windows and
/// not contain any path separators. The input can contain path
/// segments separated by either Unix or Windows path separators,
/// the rightmost non-empty segment will be used as name,
/// sanitised for special characters.
///
/// The resulting name is returned as a tuple, the first part
/// being the stem or basename and the second being an extension,
/// including the dot. E.g. "foo.txt" is returned as `("foo",
/// ".txt")` while "bar" is returned as `("bar", "")`.
///
/// The extension part will always be lowercased.
fn sanitise_name(name: &str) -> (String, String) {
/// Get a file extension if any, including the dot, in lower case, otherwise an empty string.
fn get_extension(name: &str) -> String {
let mut name = name.to_string();
for part in name.rsplit('/') {
if !part.is_empty() {
Expand All @@ -262,20 +245,12 @@ impl<'a> BlobObject<'a> {
break;
}
}
let opts = sanitize_filename::Options {
truncate: true,
windows: true,
replacement: "",
};

let clean = sanitize_filename::sanitize_with_options(name, opts);
// Let's take the tricky filename
// "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example.
// Split it into "file" and "with_lots_of_characters_behind_point_and_double_ending.tar.gz":
let mut iter = clean.splitn(2, '.');

let stem: String = iter.next().unwrap_or_default().chars().take(64).collect();
// stem == "file"
let mut iter = name.splitn(2, '.');
iter.next();

let ext_chars = iter.next().unwrap_or_default().chars();
let ext: String = ext_chars
Expand All @@ -288,11 +263,10 @@ impl<'a> BlobObject<'a> {
// ext == "d_point_and_double_ending.tar.gz"

if ext.is_empty() {
(stem, "".to_string())
ext
} else {
(stem, format!(".{ext}").to_lowercase())
// Return ("file", ".d_point_and_double_ending.tar.gz")
// which is not perfect but acceptable.
format!(".{ext}").to_lowercase()
// Return ".d_point_and_double_ending.tar.gz", which is not perfect but acceptable.
}
}

Expand Down Expand Up @@ -760,7 +734,7 @@ mod tests {
async fn test_create() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}$").unwrap();
assert!(re.is_match(blob.as_file_name()));
let fname = t.get_blobdir().join(blob.as_file_name());
let data = fs::read(fname).await.unwrap();
Expand All @@ -779,23 +753,23 @@ mod tests {
async fn test_lowercase_ext() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap();
let re = Regex::new("^\\$BLOBDIR/foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt$").unwrap();
assert!(re.is_match(blob.as_name()));
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_as_file_name() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();
assert!(re.is_match(blob.as_file_name()));
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_as_rel_path() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();
assert!(re.is_match(blob.as_rel_path().to_str().unwrap()));
}

Expand All @@ -811,7 +785,7 @@ mod tests {
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_create_dup() {
let t = TestContext::new().await;
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();

let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
assert!(re.is_match(blob.as_rel_path().to_str().unwrap()));
Expand All @@ -832,7 +806,7 @@ mod tests {
let blob = BlobObject::create(&t, "foo.tar.gz", b"hello")
.await
.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}.tar.gz$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.tar.gz$").unwrap();
assert!(re.is_match(blob.as_file_name()));
let foo_path = t.get_blobdir().join(blob.as_file_name());
assert!(foo_path.exists());
Expand All @@ -847,7 +821,6 @@ mod tests {
} else {
let name = fname.to_str().unwrap();
println!("{name}");
assert!(name.starts_with("foo"));
assert!(name.ends_with(".tar.gz"));
}
}
Expand All @@ -868,7 +841,7 @@ mod tests {
let src = t.dir.path().join("src");
fs::write(&src, b"boo").await.unwrap();
let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap();
let re = Regex::new("^\\$BLOBDIR/src-[[:xdigit:]]{16}$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap();
assert!(re.is_match(blob.as_name()));
let data = fs::read(blob.to_abs_path()).await.unwrap();
assert_eq!(data, b"boo");
Expand All @@ -890,7 +863,7 @@ mod tests {
let blob = BlobObject::new_from_path(&t, src_ext.as_ref())
.await
.unwrap();
let re = Regex::new("^\\$BLOBDIR/external-[[:xdigit:]]{16}$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap();
assert!(re.is_match(blob.as_name()));
let data = fs::read(blob.to_abs_path()).await.unwrap();
assert_eq!(data, b"boo");
Expand All @@ -903,20 +876,6 @@ mod tests {
assert_eq!(data, b"boo");
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_create_from_name_long() {
let t = TestContext::new().await;
let src_ext = t.dir.path().join("autocrypt-setup-message-4137848473.html");
fs::write(&src_ext, b"boo").await.unwrap();
let blob = BlobObject::new_from_path(&t, src_ext.as_ref())
.await
.unwrap();
let re =
Regex::new("^\\$BLOBDIR/autocrypt-setup-message-4137848473-[[:xdigit:]]{16}.html$")
.unwrap();
assert!(re.is_match(blob.as_name()));
}

#[test]
fn test_is_blob_name() {
assert!(BlobObject::is_acceptible_blob_name("foo"));
Expand All @@ -928,42 +887,24 @@ mod tests {
}

#[test]
fn test_sanitise_name() {
let (stem, ext) =
BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt");
fn test_get_extension() {
let ext = BlobObject::get_extension("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt");
assert_eq!(ext, ".txt");
assert!(!stem.is_empty());

// the extensions are kept together as between stem and extension a number may be added -
// and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz`
let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz");
assert_eq!(stem, "wot");
let ext = BlobObject::get_extension("wot.tar.gz");
assert_eq!(ext, ".tar.gz");

let (stem, ext) = BlobObject::sanitise_name(".foo.bar");
assert_eq!(stem, "");
let ext = BlobObject::get_extension(".foo.bar");
assert_eq!(ext, ".foo.bar");

let (stem, ext) = BlobObject::sanitise_name("foo?.bar");
assert!(stem.contains("foo"));
assert!(!stem.contains('?'));
let ext = BlobObject::get_extension("foo?.bar");
assert_eq!(ext, ".bar");

let (stem, ext) = BlobObject::sanitise_name("no-extension");
assert_eq!(stem, "no-extension");
let ext = BlobObject::get_extension("no-extension");
assert_eq!(ext, "");

let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c");
let ext = BlobObject::get_extension("path/ignored\\this: is* forbidden?.c");
assert_eq!(ext, ".c");
assert!(!stem.contains("path"));
assert!(!stem.contains("ignored"));
assert!(stem.contains("this"));
assert!(stem.contains("forbidden"));
assert!(!stem.contains('/'));
assert!(!stem.contains('\\'));
assert!(!stem.contains(':'));
assert!(!stem.contains('*'));
assert!(!stem.contains('?'));
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
Expand Down Expand Up @@ -1012,7 +953,7 @@ mod tests {
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
let blobdir = t.get_blobdir().to_str().unwrap();
assert!(avatar_blob.starts_with(blobdir));
let re = Regex::new("avatar-[[:xdigit:]]{16}.jpg$").unwrap();
let re = Regex::new("[[:xdigit:]]{16}.jpg$").unwrap();
assert!(re.is_match(&avatar_blob));
let avatar_blob = Path::new(&avatar_blob);
assert!(avatar_blob.exists());
Expand Down Expand Up @@ -1089,7 +1030,7 @@ mod tests {
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
let blobdir = t.get_blobdir().to_str().unwrap();
assert!(avatar_blob.starts_with(blobdir));
let re = Regex::new("avatar-[[:xdigit:]]{16}.png$").unwrap();
let re = Regex::new("[[:xdigit:]]{16}.png$").unwrap();
assert!(re.is_match(&avatar_blob));
assert!(Path::new(&avatar_blob).exists());
assert_eq!(
Expand Down
2 changes: 1 addition & 1 deletion src/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7082,7 +7082,7 @@ mod tests {
msg.param.get(Param::Filename).unwrap(),
"harmless_file.txt.exe"
);
let re = Regex::new("^\\$BLOBDIR/harmless_file-[[:xdigit:]]{16}.txt.exe$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt.exe$").unwrap();
assert!(re.is_match(msg.param.get(Param::File).unwrap()));
Ok(())
}
Expand Down
19 changes: 8 additions & 11 deletions src/debug_logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use crate::tools::time;
use crate::webxdc::StatusUpdateItem;
use async_channel::{self as channel, Receiver, Sender};
use serde_json::json;
use std::path::PathBuf;
use tokio::task;

#[derive(Debug)]
Expand Down Expand Up @@ -98,7 +97,7 @@ pub async fn maybe_set_logging_xdc(
context,
msg.get_viewtype(),
chat_id,
msg.param.get_path(Param::File, context).unwrap_or_default(),
msg.param.get(Param::Filename),
msg.get_id(),
)
.await?;
Expand All @@ -111,18 +110,16 @@ pub async fn maybe_set_logging_xdc_inner(
context: &Context,
viewtype: Viewtype,
chat_id: ChatId,
file: Option<PathBuf>,
file_name: Option<&str>,
msg_id: MsgId,
) -> anyhow::Result<()> {
if viewtype == Viewtype::Webxdc {
if let Some(file) = file {
if let Some(file_name) = file.file_name().and_then(|name| name.to_str()) {
if file_name.starts_with("debug_logging")
&& file_name.ends_with(".xdc")
&& chat_id.is_self_talk(context).await?
{
set_debug_logging_xdc(context, Some(msg_id)).await?;
}
if let Some(file_name) = file_name {
if file_name.starts_with("debug_logging")
&& file_name.ends_with(".xdc")
&& chat_id.is_self_talk(context).await?
{
set_debug_logging_xdc(context, Some(msg_id)).await?;
}
}
}
Expand Down
Loading

0 comments on commit e1d8a30

Please sign in to comment.