Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Remove original file stem from filenames in the blobstorage (#4309
Browse files Browse the repository at this point in the history
)

This way filenames in the blobstorage are just random hex numbers. This also allows us to get rid of
the `sanitize-filename` dependency.
iequidoo committed Aug 3, 2023
1 parent c44667b commit 48edc0c
Showing 10 changed files with 34 additions and 104 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -75,7 +75,6 @@ regex = "1.8"
reqwest = { version = "0.11.18", features = ["json"] }
rusqlite = { version = "0.29", features = ["sqlcipher"] }
rust-hsluv = "0.1"
sanitize-filename = "0.4"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
sha-1 = "0.10"
1 change: 0 additions & 1 deletion node/test/test.js
Original file line number Diff line number Diff line change
@@ -446,7 +446,6 @@ describe('Offline Tests with unconfigured account', function () {
context.setChatProfileImage(chatId, imagePath)
const blobPath = context.getChat(chatId).getProfileImage()
expect(blobPath.startsWith(blobs)).to.be.true
expect(blobPath.includes('image')).to.be.true
expect(blobPath.endsWith('.jpeg')).to.be.true

context.setChatProfileImage(chatId, null)
3 changes: 0 additions & 3 deletions python/tests/test_1_online.py
Original file line number Diff line number Diff line change
@@ -182,13 +182,11 @@ def send_and_receive_message():
msg = send_and_receive_message()
assert msg.text == "withfile"
assert open(msg.file_path).read() == "some data"
msg.file_path.index(basename)
assert msg.file_path.endswith(ext)

msg2 = send_and_receive_message()
assert msg2.text == "withfile"
assert open(msg2.file_path).read() == "some data"
msg2.file_path.index(basename)
assert msg2.file_path.endswith(ext)
assert msg.file_path != msg2.file_path

@@ -215,7 +213,6 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp):
msg = ac2.get_message_by_id(ev.data2)

assert open(msg.file_path).read() == content
msg.file_path.index(basename)
assert msg.file_path.endswith(ext)


1 change: 0 additions & 1 deletion python/tests/test_2_increation.py
Original file line number Diff line number Diff line change
@@ -50,7 +50,6 @@ def test_no_increation_copies_to_blobdir(self, tmp_path, acfactory, lp):
src = tmp_path / "file.txt"
src.write_text("hello there\n")
msg = chat.send_file(str(src))
assert msg.file_path.startswith(os.path.join(ac1.get_blobdir(), "file"))
assert msg.file_path.endswith(".txt")

def test_forward_increation(self, acfactory, data, lp):
123 changes: 32 additions & 91 deletions src/blob.rs
Original file line number Diff line number Diff line change
@@ -47,8 +47,8 @@ impl<'a> BlobObject<'a> {
data: &[u8],
) -> Result<BlobObject<'a>> {
let blobdir = context.get_blobdir();
let (stem, ext) = BlobObject::sanitise_name(suggested_name);
let (name, mut file) = BlobObject::create_new_file(context, blobdir, &stem, &ext).await?;
let ext = BlobObject::get_extension(suggested_name);
let (name, mut file) = BlobObject::create_new_file(context, blobdir, &ext).await?;
file.write_all(data).await.context("file write failure")?;

// workaround a bug in async-std
@@ -68,12 +68,11 @@ impl<'a> BlobObject<'a> {
async fn create_new_file(
context: &Context,
dir: &Path,
stem: &str,
ext: &str,
) -> Result<(String, fs::File)> {
let mut attempt = 0;
loop {
let name = format!("{}-{:016x}{}", stem, rand::random::<u64>(), ext);
let name = format!("{:016x}{}", rand::random::<u64>(), ext);
attempt += 1;
let path = dir.join(&name);
match fs::OpenOptions::new()
@@ -104,9 +103,9 @@ impl<'a> BlobObject<'a> {
let mut src_file = fs::File::open(src)
.await
.with_context(|| format!("failed to open file {}", src.display()))?;
let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy());
let ext = BlobObject::get_extension(&src.to_string_lossy());
let (name, mut dst_file) =
BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?;
BlobObject::create_new_file(context, context.get_blobdir(), &ext).await?;
let name_for_err = name.clone();
if let Err(err) = io::copy(&mut src_file, &mut dst_file).await {
// Attempt to remove the failed file, swallow errors resulting from that.
@@ -130,10 +129,8 @@ impl<'a> BlobObject<'a> {
///
/// If the source file is not a path to into the blob directory
/// the file will be copied into the blob directory first. If the
/// source file is already in the blobdir it will not be copied
/// and only be created if it is a valid blobname, that is no
/// subdirectory is used and [BlobObject::sanitise_name] does not
/// modify the filename.
/// source file is already in the blobdir (but not in a subdirectory)
/// it will not be copied and only be created if it is a valid blobname.
///
/// Paths into the blob directory may be either defined by an absolute path
/// or by the relative prefix `$BLOBDIR`.
@@ -150,8 +147,7 @@ impl<'a> BlobObject<'a> {
/// Returns a [BlobObject] for an existing blob from a path.
///
/// The path must designate a file directly in the blobdir and
/// must use a valid blob name. That is after sanitisation the
/// name must still be the same, that means it must be valid UTF-8
/// must use a valid blob name. That means it must be valid UTF-8
/// and not have any special characters in it.
pub fn from_path(context: &'a Context, path: &Path) -> Result<BlobObject<'a>> {
let rel_path = path
@@ -225,21 +221,8 @@ impl<'a> BlobObject<'a> {
}
}

/// Create a safe name based on a messy input string.
///
/// The safe name will be a valid filename on Unix and Windows and
/// not contain any path separators. The input can contain path
/// segments separated by either Unix or Windows path separators,
/// the rightmost non-empty segment will be used as name,
/// sanitised for special characters.
///
/// The resulting name is returned as a tuple, the first part
/// being the stem or basename and the second being an extension,
/// including the dot. E.g. "foo.txt" is returned as `("foo",
/// ".txt")` while "bar" is returned as `("bar", "")`.
///
/// The extension part will always be lowercased.
fn sanitise_name(name: &str) -> (String, String) {
/// Get a file extension if any, including the dot, in lower case, otherwise an empty string.
fn get_extension(name: &str) -> String {
let mut name = name.to_string();
for part in name.rsplit('/') {
if !part.is_empty() {
@@ -253,20 +236,12 @@ impl<'a> BlobObject<'a> {
break;
}
}
let opts = sanitize_filename::Options {
truncate: true,
windows: true,
replacement: "",
};

let clean = sanitize_filename::sanitize_with_options(name, opts);
// Let's take the tricky filename
// "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example.
// Split it into "file" and "with_lots_of_characters_behind_point_and_double_ending.tar.gz":
let mut iter = clean.splitn(2, '.');

let stem: String = iter.next().unwrap_or_default().chars().take(64).collect();
// stem == "file"
let mut iter = name.splitn(2, '.');
iter.next();

let ext_chars = iter.next().unwrap_or_default().chars();
let ext: String = ext_chars
@@ -279,11 +254,10 @@ impl<'a> BlobObject<'a> {
// ext == "d_point_and_double_ending.tar.gz"

if ext.is_empty() {
(stem, "".to_string())
ext
} else {
(stem, format!(".{ext}").to_lowercase())
// Return ("file", ".d_point_and_double_ending.tar.gz")
// which is not perfect but acceptable.
format!(".{ext}").to_lowercase()
// Return ".d_point_and_double_ending.tar.gz", which is not perfect but acceptable.
}
}

@@ -638,7 +612,7 @@ mod tests {
async fn test_create() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}$").unwrap();
assert!(re.is_match(blob.as_file_name()));
let fname = t.get_blobdir().join(blob.as_file_name());
let data = fs::read(fname).await.unwrap();
@@ -657,23 +631,23 @@ mod tests {
async fn test_lowercase_ext() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap();
let re = Regex::new("^\\$BLOBDIR/foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt$").unwrap();
assert!(re.is_match(blob.as_name()));
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_as_file_name() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();
assert!(re.is_match(blob.as_file_name()));
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_as_rel_path() {
let t = TestContext::new().await;
let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();
assert!(re.is_match(blob.as_rel_path().to_str().unwrap()));
}

@@ -689,7 +663,7 @@ mod tests {
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_create_dup() {
let t = TestContext::new().await;
let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.txt$").unwrap();

let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap();
assert!(re.is_match(blob.as_rel_path().to_str().unwrap()));
@@ -710,7 +684,7 @@ mod tests {
let blob = BlobObject::create(&t, "foo.tar.gz", b"hello")
.await
.unwrap();
let re = Regex::new("^foo-[[:xdigit:]]{16}.tar.gz$").unwrap();
let re = Regex::new("^[[:xdigit:]]{16}.tar.gz$").unwrap();
assert!(re.is_match(blob.as_file_name()));
let foo_path = t.get_blobdir().join(blob.as_file_name());
assert!(foo_path.exists());
@@ -725,7 +699,6 @@ mod tests {
} else {
let name = fname.to_str().unwrap();
println!("{name}");
assert!(name.starts_with("foo"));
assert!(name.ends_with(".tar.gz"));
}
}
@@ -746,7 +719,7 @@ mod tests {
let src = t.dir.path().join("src");
fs::write(&src, b"boo").await.unwrap();
let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap();
let re = Regex::new("^\\$BLOBDIR/src-[[:xdigit:]]{16}$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap();
assert!(re.is_match(blob.as_name()));
let data = fs::read(blob.to_abs_path()).await.unwrap();
assert_eq!(data, b"boo");
@@ -768,7 +741,7 @@ mod tests {
let blob = BlobObject::new_from_path(&t, src_ext.as_ref())
.await
.unwrap();
let re = Regex::new("^\\$BLOBDIR/external-[[:xdigit:]]{16}$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}$").unwrap();
assert!(re.is_match(blob.as_name()));
let data = fs::read(blob.to_abs_path()).await.unwrap();
assert_eq!(data, b"boo");
@@ -781,20 +754,6 @@ mod tests {
assert_eq!(data, b"boo");
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_create_from_name_long() {
let t = TestContext::new().await;
let src_ext = t.dir.path().join("autocrypt-setup-message-4137848473.html");
fs::write(&src_ext, b"boo").await.unwrap();
let blob = BlobObject::new_from_path(&t, src_ext.as_ref())
.await
.unwrap();
let re =
Regex::new("^\\$BLOBDIR/autocrypt-setup-message-4137848473-[[:xdigit:]]{16}.html$")
.unwrap();
assert!(re.is_match(blob.as_name()));
}

#[test]
fn test_is_blob_name() {
assert!(BlobObject::is_acceptible_blob_name("foo"));
@@ -806,42 +765,24 @@ mod tests {
}

#[test]
fn test_sanitise_name() {
let (stem, ext) =
BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt");
fn test_get_extension() {
let ext = BlobObject::get_extension("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt");
assert_eq!(ext, ".txt");
assert!(!stem.is_empty());

// the extensions are kept together as between stem and extension a number may be added -
// and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz`
let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz");
assert_eq!(stem, "wot");
let ext = BlobObject::get_extension("wot.tar.gz");
assert_eq!(ext, ".tar.gz");

let (stem, ext) = BlobObject::sanitise_name(".foo.bar");
assert_eq!(stem, "");
let ext = BlobObject::get_extension(".foo.bar");
assert_eq!(ext, ".foo.bar");

let (stem, ext) = BlobObject::sanitise_name("foo?.bar");
assert!(stem.contains("foo"));
assert!(!stem.contains('?'));
let ext = BlobObject::get_extension("foo?.bar");
assert_eq!(ext, ".bar");

let (stem, ext) = BlobObject::sanitise_name("no-extension");
assert_eq!(stem, "no-extension");
let ext = BlobObject::get_extension("no-extension");
assert_eq!(ext, "");

let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c");
let ext = BlobObject::get_extension("path/ignored\\this: is* forbidden?.c");
assert_eq!(ext, ".c");
assert!(!stem.contains("path"));
assert!(!stem.contains("ignored"));
assert!(stem.contains("this"));
assert!(stem.contains("forbidden"));
assert!(!stem.contains('/'));
assert!(!stem.contains('\\'));
assert!(!stem.contains(':'));
assert!(!stem.contains('*'));
assert!(!stem.contains('?'));
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -856,7 +797,7 @@ mod tests {
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
let blobdir = t.get_blobdir().to_str().unwrap();
assert!(avatar_blob.starts_with(blobdir));
let re = Regex::new("avatar-[[:xdigit:]]{16}.jpg$").unwrap();
let re = Regex::new("[[:xdigit:]]{16}.jpg$").unwrap();
assert!(re.is_match(&avatar_blob));
let avatar_blob = Path::new(&avatar_blob);
assert!(avatar_blob.exists());
@@ -925,7 +866,7 @@ mod tests {
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
let blobdir = t.get_blobdir().to_str().unwrap();
assert!(avatar_blob.starts_with(blobdir));
let re = Regex::new("avatar-[[:xdigit:]]{16}.png$").unwrap();
let re = Regex::new("[[:xdigit:]]{16}.png$").unwrap();
assert!(re.is_match(&avatar_blob));
assert!(Path::new(&avatar_blob).exists());
assert_eq!(
2 changes: 1 addition & 1 deletion src/chat.rs
Original file line number Diff line number Diff line change
@@ -6245,7 +6245,7 @@ mod tests {
msg.param.get(Param::Filename).unwrap(),
"harmless_file.txt.exe"
);
let re = Regex::new("^\\$BLOBDIR/harmless_file-[[:xdigit:]]{16}.txt.exe$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.txt.exe$").unwrap();
assert!(re.is_match(msg.param.get(Param::File).unwrap()));
Ok(())
}
2 changes: 1 addition & 1 deletion src/mimeparser.rs
Original file line number Diff line number Diff line change
@@ -3712,7 +3712,7 @@ Message.
mime_message.parts[0].msg,
"this is a classic email – I attached the .EML file".to_string()
);
let re = Regex::new("^\\$BLOBDIR/-[[:xdigit:]]{16}.eml$").unwrap();
let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}.eml$").unwrap();
assert!(re.is_match(mime_message.parts[0].param.get(Param::File).unwrap()));

assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string()));
2 changes: 0 additions & 2 deletions src/param.rs
Original file line number Diff line number Diff line change
@@ -530,8 +530,6 @@ mod tests {
assert!(p.get_blob(Param::File, &t, false).await.is_err());

fs::write(fname, b"boo").await.unwrap();
let blob = p.get_blob(Param::File, &t, true).await.unwrap().unwrap();
assert!(blob.as_file_name().starts_with("foo"));

// Blob in blobdir, expect blob.
let bar_path = t.get_blobdir().join("bar");
2 changes: 0 additions & 2 deletions src/receive_imf/tests.rs
Original file line number Diff line number Diff line change
@@ -1457,7 +1457,6 @@ async fn test_pdf_filename_simple() {
assert_eq!(msg.viewtype, Viewtype::File);
assert_eq!(msg.text, "mail body");
let file_path = msg.param.get(Param::File).unwrap();
assert!(file_path.starts_with("$BLOBDIR/simple"));
assert!(file_path.ends_with(".pdf"));
}

@@ -1473,7 +1472,6 @@ async fn test_pdf_filename_continuation() {
assert_eq!(msg.viewtype, Viewtype::File);
assert_eq!(msg.text, "mail body");
let file_path = msg.param.get(Param::File).unwrap();
assert!(file_path.starts_with("$BLOBDIR/test pdf äöüß"));
assert!(file_path.ends_with(".pdf"));
}

0 comments on commit 48edc0c

Please sign in to comment.