Skip to content

Commit

Permalink
Support for extracting attachments from OneNote section files
Browse files Browse the repository at this point in the history
Includes rudimentary support for getting slices from FMap's and for
interacting with libclamav's context structure.

For now will use a Cisco-Talos org fork of the onenote_parser
until the feature to read open a onenote section from a slice (instead
of from a filepath) is added to the upstream.
  • Loading branch information
micahsnyder committed Nov 6, 2023
1 parent 0f9de9e commit 1c7b11e
Show file tree
Hide file tree
Showing 22 changed files with 910 additions and 349 deletions.
402 changes: 149 additions & 253 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions libclamav/filetypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ static const struct ftmap_s {
{ "CL_TYPE_EGG", CL_TYPE_EGG },
{ "CL_TYPE_EGGSFX", CL_TYPE_EGGSFX },
{ "CL_TYPE_UDF", CL_TYPE_UDF },
{ "CL_TYPE_ONENOTE", CL_TYPE_ONENOTE },
{ NULL, CL_TYPE_IGNORED }
};
// clang-format on
Expand Down
1 change: 1 addition & 0 deletions libclamav/filetypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ typedef enum cli_file {
CL_TYPE_OOXML_HWP,
CL_TYPE_PS,
CL_TYPE_EGG,
CL_TYPE_ONENOTE,

/* Section for partition types */
CL_TYPE_PART_ANY, /* unknown partition type */
Expand Down
1 change: 1 addition & 0 deletions libclamav/filetypes_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,5 +204,6 @@ static const char *ftypes_int[] = {
"0:0:4d4d:TIFF Big Endian:CL_TYPE_ANY:CL_TYPE_GRAPHICS:81:121",
"1:*:377abcaf271c:7zip-SFX:CL_TYPE_ANY:CL_TYPE_7ZSFX:74",
"1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}70726f6769643d22576f72642e446f63756d656e74223f3e:Microsoft Word 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_WORD:80",
"0:0:e4525c7b8cd8a74daeb15378d02996d3:Microsoft OneNote Document:CL_TYPE_ANY:CL_TYPE_ONENOTE:200",
NULL};
#endif
1 change: 1 addition & 0 deletions libclamav/libclamav.map
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ CLAMAV_PRIVATE {
readdb_parse_ldb_subsignature;
fuzzy_hash_calculate_image;
ffierror_fmt;
cli_magic_scan_buff;

__cli_strcasestr;
__cli_strndup;
Expand Down
1 change: 1 addition & 0 deletions libclamav/others.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ typedef struct recursion_level_tag {
} recursion_level_t;

typedef void *evidence_t;
typedef void *onedump_t;

/* internal clamav context */
typedef struct cli_ctx_tag {
Expand Down
5 changes: 5 additions & 0 deletions libclamav/scanners.c
Original file line number Diff line number Diff line change
Expand Up @@ -4591,6 +4591,11 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
ret = cli_scanegg(ctx);
break;

case CL_TYPE_ONENOTE:
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG))
ret = scan_onenote(ctx);
break;

case CL_TYPE_OOXML_WORD:
case CL_TYPE_OOXML_PPT:
case CL_TYPE_OOXML_XL:
Expand Down
2 changes: 2 additions & 0 deletions libclamav_rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ base64 = "0.21.0"
sha1 = "0.10.5"
unicode-segmentation = "1.10.1"
bindgen = "0.65"
onenote_parser = { git = "https://github.com/Cisco-Talos/onenote.rs.git", branch = "CLAM-2329-new-from-slice" }
hex-literal = "0.4.1"

[lib]
crate-type = ["staticlib"]
Expand Down
6 changes: 5 additions & 1 deletion libclamav_rust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const BINDGEN_FUNCTIONS: &[&str] = &[
"cli_versig2",
"cli_getdsig",
"cli_get_debug_flag",
"cli_magic_scan_buff",
];

// Generate bindings for these types (structs, enums):
Expand All @@ -61,6 +62,7 @@ const BINDGEN_TYPES: &[&str] = &[
"cli_ac_result",
"css_image_extractor_t",
"css_image_handle_t",
"onedump_t",
];

// Find the required functions and types in these headers:
Expand All @@ -70,6 +72,8 @@ const BINDGEN_HEADERS: &[&str] = &[
"../libclamav/others.h",
"../libclamav/dsig.h",
"../libclamav/htmlnorm.h",
"../libclamav/fmap.h",
"../libclamav/scanners.h",
];

// Find the required headers in these directories:
Expand Down Expand Up @@ -135,7 +139,7 @@ fn execute_bindgen() -> Result<(), &'static str> {
// Silence code-style warnings for generated bindings.
.raw_line("#![allow(non_snake_case, non_camel_case_types, non_upper_case_globals)]")
// Make the bindings pretty.
.rustfmt_bindings(true)
.formatter(bindgen::Formatter::Rustfmt)
// Disable the layout tests.
// We're commiting to source control. Pointer width, integer size, etc
// are probably not the same when generated as when compiled.
Expand Down
1 change: 1 addition & 0 deletions libclamav_rust/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ include = [
"evidence::evidence_num_indicators_type",
"evidence::evidence_add_indicator",
"evidence::IndicatorType",
"scanners::scan_onenote",
]

# prefix = "CAPI_"
Expand Down
55 changes: 27 additions & 28 deletions libclamav_rust/src/cdiff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ use crate::validate_str_param;
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use log::{debug, error, warn};
use sha2::{Digest, Sha256};
use thiserror::Error;

/// Size of a digital signature
const SIG_SIZE: usize = 350;
Expand Down Expand Up @@ -88,8 +87,8 @@ struct Context {
}

/// Possible errors returned by cdiff_apply() and script2cdiff
#[derive(Debug, Error)]
pub enum CdiffError {
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Error in header: {0}")]
Header(#[from] HeaderError),

Expand Down Expand Up @@ -151,7 +150,7 @@ pub enum CdiffError {

/// Errors particular to input handling (i.e., syntax, or side effects from
/// handling input)
#[derive(Error, Debug)]
#[derive(thiserror::Error, Debug)]
pub enum InputError {
#[error("Unsupported command provided: {0}")]
UnknownCommand(String),
Expand Down Expand Up @@ -199,7 +198,7 @@ pub enum InputError {
}

/// Errors encountered while processing
#[derive(Debug, Error)]
#[derive(thiserror::Error, Debug)]
pub enum ProcessingError {
#[error("File {0} not closed before calling action MOVE")]
NotClosedBeforeAction(String),
Expand Down Expand Up @@ -238,7 +237,7 @@ pub enum ProcessingError {
IoError(#[from] std::io::Error),
}

#[derive(Error, Debug)]
#[derive(thiserror::Error, Debug)]
pub enum HeaderError {
#[error("invalid magic")]
BadMagic,
Expand All @@ -253,7 +252,7 @@ pub enum HeaderError {
IoError(#[from] std::io::Error),
}

#[derive(Error, Debug)]
#[derive(thiserror::Error, Debug)]
pub enum SignatureError {
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
Expand All @@ -265,7 +264,7 @@ pub enum SignatureError {
TooLarge,
}

#[derive(Error, Debug)]
#[derive(thiserror::Error, Debug)]
pub enum InvalidNumber {
#[error("not unicode")]
NotUnicode(#[from] std::str::Utf8Error),
Expand Down Expand Up @@ -462,7 +461,7 @@ pub extern "C" fn _script2cdiff(
/// signature from the sha256 of the contents written.
///
/// This function will panic if any of the &str parameters contain interior NUL bytes
pub fn script2cdiff(script_file_name: &str, builder: &str, server: &str) -> Result<(), CdiffError> {
pub fn script2cdiff(script_file_name: &str, builder: &str, server: &str) -> Result<(), Error> {
// Make a copy of the script file name to use for the cdiff file
let cdiff_file_name_string = script_file_name.to_string();
let mut cdiff_file_name = cdiff_file_name_string.as_str();
Expand All @@ -476,40 +475,40 @@ pub fn script2cdiff(script_file_name: &str, builder: &str, server: &str) -> Resu
// Get right-most hyphen index
let hyphen_index = cdiff_file_name
.rfind('-')
.ok_or(CdiffError::FilenameMissingHyphen)?;
.ok_or(Error::FilenameMissingHyphen)?;

// Get the version, which should be to the right of the hyphen
let version_string = cdiff_file_name
.get((hyphen_index + 1)..)
.ok_or(CdiffError::FilenameMissingVersion)?;
.ok_or(Error::FilenameMissingVersion)?;

// Parse the version into usize
let version = version_string
.to_string()
.parse::<usize>()
.map_err(CdiffError::VersionParse)?;
.map_err(Error::VersionParse)?;

// Add .cdiff suffix
let cdiff_file_name = format!("{}.{}", cdiff_file_name, "cdiff");
debug!("script2cdiff() - writing to: {:?}", &cdiff_file_name);

// Open cdiff_file_name for writing
let mut cdiff_file: File = File::create(&cdiff_file_name)
.map_err(|e| CdiffError::FileCreate(cdiff_file_name.to_owned(), e))?;
.map_err(|e| Error::FileCreate(cdiff_file_name.to_owned(), e))?;

// Open the original script file for reading
let script_file: File = File::open(script_file_name)
.map_err(|e| CdiffError::FileOpen(script_file_name.to_owned(), e))?;
.map_err(|e| Error::FileOpen(script_file_name.to_owned(), e))?;

// Get file length
let script_file_len = script_file
.metadata()
.map_err(|e| CdiffError::FileMeta(script_file_name.to_owned(), e))?
.map_err(|e| Error::FileMeta(script_file_name.to_owned(), e))?
.len();

// Write header to cdiff file
write!(cdiff_file, "ClamAV-Diff:{}:{}:", version, script_file_len)
.map_err(|e| CdiffError::FileWrite(script_file_name.to_owned(), e))?;
.map_err(|e| Error::FileWrite(script_file_name.to_owned(), e))?;

// Set up buffered reader and gz writer
let mut reader = BufReader::new(script_file);
Expand All @@ -521,12 +520,12 @@ pub fn script2cdiff(script_file_name: &str, builder: &str, server: &str) -> Resu
// Get cdiff file writer back from flate2
let mut cdiff_file = gz
.finish()
.map_err(|e| CdiffError::FileWrite(cdiff_file_name.to_owned(), e))?;
.map_err(|e| Error::FileWrite(cdiff_file_name.to_owned(), e))?;

// Get the new cdiff file len
let cdiff_file_len = cdiff_file
.metadata()
.map_err(|e| CdiffError::FileMeta(cdiff_file_name.to_owned(), e))?
.map_err(|e| Error::FileMeta(cdiff_file_name.to_owned(), e))?
.len();
debug!(
"script2cdiff() - wrote {} bytes to {}",
Expand All @@ -536,7 +535,7 @@ pub fn script2cdiff(script_file_name: &str, builder: &str, server: &str) -> Resu
// Calculate SHA2-256 to get the sigature
// TODO: Do this while the file is being written
let bytes = std::fs::read(&cdiff_file_name)
.map_err(|e| CdiffError::FileRead(cdiff_file_name.to_owned(), e))?;
.map_err(|e| Error::FileRead(cdiff_file_name.to_owned(), e))?;
let sha256 = {
let mut hasher = Sha256::new();
hasher.update(&bytes);
Expand All @@ -561,12 +560,12 @@ pub fn script2cdiff(script_file_name: &str, builder: &str, server: &str) -> Resu
// Write cdiff footer delimiter
cdiff_file
.write_all(b":")
.map_err(|e| CdiffError::FileWrite(cdiff_file_name.to_owned(), e))?;
.map_err(|e| Error::FileWrite(cdiff_file_name.to_owned(), e))?;

// Write dsig to cdiff footer
cdiff_file
.write_all(dsig.to_bytes())
.map_err(|e| CdiffError::FileWrite(cdiff_file_name, e))?;
.map_err(|e| Error::FileWrite(cdiff_file_name, e))?;

// Exit success
Ok(())
Expand Down Expand Up @@ -609,7 +608,7 @@ pub extern "C" fn _cdiff_apply(fd: i32, mode: u16) -> i32 {
/// A cdiff file contains a footer that is the signed signature of the sha256
/// file contains of the header and the body. The footer begins after the first
/// ':' character to the left of EOF.
pub fn cdiff_apply(file: &mut File, mode: ApplyMode) -> Result<(), CdiffError> {
pub fn cdiff_apply(file: &mut File, mode: ApplyMode) -> Result<(), Error> {
let path = std::env::current_dir().unwrap();
debug!("cdiff_apply() - current directory is {}", path.display());

Expand Down Expand Up @@ -649,7 +648,7 @@ pub fn cdiff_apply(file: &mut File, mode: ApplyMode) -> Result<(), CdiffError> {
};
debug!("cdiff_apply() - cli_versig2() result = {}", versig_result);
if versig_result != 0 {
return Err(CdiffError::InvalidDigitalSignature);
return Err(Error::InvalidDigitalSignature);
}

// Read file length from header
Expand Down Expand Up @@ -1042,7 +1041,7 @@ fn process_lines<T>(
ctx: &mut Context,
reader: &mut T,
uncompressed_size: usize,
) -> Result<(), CdiffError>
) -> Result<(), Error>
where
T: BufRead,
{
Expand All @@ -1059,7 +1058,7 @@ where
match linebuf.first() {
// Skip comment lines
Some(b'#') => continue,
_ => process_line(ctx, &linebuf).map_err(|e| CdiffError::Input {
_ => process_line(ctx, &linebuf).map_err(|e| Error::Input {
line: line_no,
err: e,
operation: String::from_utf8_lossy(&linebuf).to_string(),
Expand Down Expand Up @@ -1150,7 +1149,7 @@ fn read_size(file: &mut File) -> Result<(u32, usize), HeaderError> {
}

/// Calculate the sha256 of the first len bytes of a file
fn get_hash(file: &mut File, len: usize) -> Result<[u8; 32], CdiffError> {
fn get_hash(file: &mut File, len: usize) -> Result<[u8; 32], Error> {
let mut hasher = Sha256::new();

// Seek to beginning of file
Expand Down Expand Up @@ -1193,7 +1192,7 @@ mod tests {
use std::path::Path;

/// CdiffTestError enumerates all possible errors returned by this testing library.
#[derive(Error, Debug)]
#[derive(thiserror::Error, Debug)]
pub enum CdiffTestError {
/// Represents all other cases of `std::io::Error`.
#[error(transparent)]
Expand Down Expand Up @@ -1545,7 +1544,7 @@ mod tests {
fn script2cdiff_missing_hyphen() {
assert!(matches!(
script2cdiff("", "", ""),
Err(CdiffError::FilenameMissingHyphen)
Err(Error::FilenameMissingHyphen)
));
}
}
11 changes: 5 additions & 6 deletions libclamav_rust/src/css_image_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@ use std::{ffi::CStr, mem::ManuallyDrop, os::raw::c_char};

use base64::{engine::general_purpose as base64_engine_standard, Engine as _};
use log::{debug, error, warn};
use thiserror::Error;
use unicode_segmentation::UnicodeSegmentation;

use crate::sys;

/// CdiffError enumerates all possible errors returned by this library.
#[derive(Error, Debug)]
pub enum CssExtractError {
/// Error enumerates all possible errors returned by this library.
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Invalid format")]
Format,

Expand All @@ -56,7 +55,7 @@ pub struct CssImageExtractor<'a> {
}

impl<'a> CssImageExtractor<'a> {
pub fn new(css: &'a str) -> Result<Self, CssExtractError> {
pub fn new(css: &'a str) -> Result<Self, Error> {
Ok(Self { remaining: css })
}

Expand Down Expand Up @@ -152,7 +151,7 @@ impl<'a> CssImageExtractor<'a> {
};

// Trim off " at end.
let c = url_parameter.graphemes(true).rev().next();
let c = url_parameter.graphemes(true).next_back();
if let Some(c) = c {
if c == "\"" {
(url_parameter, _) = url_parameter.split_at(url_parameter.len() - 1);
Expand Down
Loading

0 comments on commit 1c7b11e

Please sign in to comment.