Skip to content

Commit

Permalink
fix(file type check): Modify how we determine the declared extension (#…
Browse files Browse the repository at this point in the history
…435)

* corrections

* fix(file type check):  Modify how we determine the declared extension, from relying on aws added content-type to including extension in filename

* allow jpg

* mirror logic in frontend... reallyv etting the mime type is allowed based off the extension.  consistency first
  • Loading branch information
mdial89f authored Mar 11, 2024
1 parent 935a341 commit 25e2cac
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 32 deletions.
15 changes: 13 additions & 2 deletions src/services/api/handlers/getUploadUrl.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,29 @@
import { response } from "../libs/handler";
import { APIGatewayEvent } from "aws-lambda";
import { S3Client, PutObjectCommand } from "@aws-sdk/client-s3";
import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
import { v4 as uuidv4 } from "uuid";
import * as path from "node:path";

checkEnvVariables(["attachmentsBucketName", "attachmentsBucketRegion"]);

const s3 = new S3Client({
region: process.env.attachmentsBucketRegion,
});

export const handler = async () => {
export const handler = async (event: APIGatewayEvent) => {
try {
if (!event.body) {
return response({
statusCode: 400,
body: { message: "Event body required" },
});
}
const body = JSON.parse(event.body);
const bucket = process.env.attachmentsBucketName;
const key = uuidv4();
const fileName = body.fileName;
const extension = path.extname(fileName);
const key = `${uuidv4()}${extension}`; // ex: 123e4567-e89b-12d3-a456-426614174000.pdf
const url = await getSignedUrl(
s3,
new PutObjectCommand({
Expand Down
6 changes: 4 additions & 2 deletions src/services/ui/src/api/submissionService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,11 @@ export const submit = async <T extends Record<string, unknown>>({
);
// Generate a presigned url for each attachment
const preSignedURLs: PreSignedURL[] = await Promise.all(
attachments.map(() =>
attachments.map((attachment) =>
API.post("os", "/getUploadUrl", {
body: {},
body: {
fileName: attachment.file.name,
},
})
)
);
Expand Down
3 changes: 2 additions & 1 deletion src/services/uploads/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"license": "CC0-1.0",
"devDependencies": {},
"dependencies": {
"file-type": "^19.0.0"
"file-type": "^19.0.0",
"mime-types": "^2.1.35"
}
}
23 changes: 15 additions & 8 deletions src/services/uploads/src/antivirus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
HeadObjectCommand,
GetObjectCommand,
PutObjectTaggingCommand,
HeadObjectCommandOutput,
} from "@aws-sdk/client-s3";
import { randomUUID } from "crypto";
import fs from "fs";
Expand All @@ -22,12 +23,22 @@ export async function isS3FileTooBig(
const res: HeadObjectCommandOutput = await s3Client.send(
new HeadObjectCommand({ Key: key, Bucket: bucket })
);
return res.ContentLength > constants.MAX_FILE_SIZE;
if (
res.ContentLength === undefined ||
res.ContentLength === null ||
typeof res.ContentLength !== 'number'
) {
utils.generateSystemMessage(
`ContentLength is invalid for S3 Object: s3://${bucket}/${key}`
);
return false; // Or handle accordingly
}
return res.ContentLength > parseInt(constants.MAX_FILE_SIZE);
} catch (e) {
utils.generateSystemMessage(
`Error finding size of S3 Object: s3://${bucket}/${key}`
);
return false;
return true;
}
}

Expand All @@ -39,7 +50,7 @@ async function downloadFileFromS3(
fs.mkdirSync(constants.TMP_DOWNLOAD_PATH);
}

const localPath: string = `${constants.TMP_DOWNLOAD_PATH}${randomUUID()}.tmp`;
const localPath: string = `${constants.TMP_DOWNLOAD_PATH}${randomUUID()}--${s3ObjectKey}`;
const writeStream: fs.WriteStream = fs.createWriteStream(localPath);

utils.generateSystemMessage(
Expand Down Expand Up @@ -88,11 +99,7 @@ const scanAndTagS3Object = async (
s3ObjectBucket
);
utils.generateSystemMessage("Set virusScanStatus");
const metadata = await s3Client.send(new HeadObjectCommand({
Bucket: s3ObjectBucket,
Key: s3ObjectKey,
}));
virusScanStatus = await scanLocalFile(fileLoc, metadata.ContentType);
virusScanStatus = await scanLocalFile(fileLoc);
utils.generateSystemMessage(`virusScanStatus=${virusScanStatus}`);
}

Expand Down
104 changes: 85 additions & 19 deletions src/services/uploads/src/clamav.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ import {
import { spawnSync, SpawnSyncReturns } from "child_process";
import path from "path";
import fs from "fs";
import readline from 'readline';
import asyncfs from "fs/promises";
import * as constants from "./constants";
import * as utils from "./utils";
import { FileExtension, MimeType, fileTypeFromFile } from "file-type";
import mimeTypes from 'mime-types';
import {FILE_TYPES} from "shared-types"

const s3Client: S3Client = new S3Client();

Expand Down Expand Up @@ -196,6 +199,40 @@ export const uploadAVDefinitions = async (): Promise<void[]> => {
return await Promise.all(uploadPromises);
};

async function looksLikeCsv(filePath: string, delimiter: string = ',', maxLinesToCheck: number = 10): Promise<boolean> {
const fileStream = fs.createReadStream(filePath);

const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});

let lineNumber = 0;
let previousNumberOfFields = 0;

for await (const line of rl) {
lineNumber++;

// Skip empty lines
if (line.trim() === '') continue;

const fields = line.split(delimiter);

// Check if the number of fields is consistent across rows
if (lineNumber > 1 && fields.length !== previousNumberOfFields) {
return false;
}

previousNumberOfFields = fields.length;

if (lineNumber >= maxLinesToCheck) {
break;
}
}

return true;
}

/**
* Function to scan the given file. This function requires ClamAV and the definitions to be available.
* This function does not download the file so the file should also be accessible.
Expand All @@ -209,24 +246,43 @@ export const uploadAVDefinitions = async (): Promise<void[]> => {
*/
export const scanLocalFile = async (
pathToFile: string,
contentType: string | undefined
): Promise<string | null> => {
try {
if (!contentType) {
utils.generateSystemMessage("FAILURE - EXTENSION UNKNOWN");
try {
// Calculate the mime type based off the extension.
let mimeTypeFromExtension = mimeTypes.lookup(path.extname(pathToFile));

// Error out if mimeTypes couldn't figure out the mime type.
if (!mimeTypeFromExtension) {
utils.generateSystemMessage("FAILURE - CANNOT DETERMINE MIMETYPE FROM EXTENSION");
return constants.STATUS_UNKNOWN_EXTENSION;
}
let detectedContentType = await getFileTypeFromContents(pathToFile);
if (detectedContentType) {
console.log(`File declared extension: ${contentType}`);
console.log(`File detected extension: ${detectedContentType}`);
let same = areMimeTypesEquivalent(contentType, detectedContentType);
if (!same) {
utils.generateSystemMessage(
`FAILURE - FILE EXTENSION DOES NOT MATCH FILE CONTENTS`
);
return constants.STATUS_EXTENSION_MISMATCH_FILE;
}

// Error out if the extension is not allowed
if (!isAllowedMime(mimeTypeFromExtension)) {
utils.generateSystemMessage("FAILURE - EXTENSION IS NOT OF AN ALLOWED TYPE");
return constants.STATUS_UNKNOWN_EXTENSION;
}

// Caclulate the mime type based off the file's contents.
let mimeTypeFromContents = await getFileTypeFromContents(pathToFile);
// Error out if file-type couldn't determine the mime type.
if(!mimeTypeFromContents) {
utils.generateSystemMessage("FAILURE - CANNOT DETERMINE MIMETYPE FROM CONTENTS");
return constants.STATUS_UNKNOWN_EXTENSION;
}

// Log
console.log(`File mimetype from extension: ${mimeTypeFromExtension}`);
console.log(`File mimetype from contents: ${mimeTypeFromContents}`);

// Check if the mimes are equivalent
let same = areMimeTypesEquivalent(mimeTypeFromExtension, mimeTypeFromContents);
// Error out if we can't determine equivalence
if (!same) {
utils.generateSystemMessage(
`FAILURE - MIMETYPE CALCULATED FROM EXTENSION DOES NOT MATCH MIMETYPE CALCULATED FROM CONTENTS`
);
return constants.STATUS_EXTENSION_MISMATCH_FILE;
}

const avResult: SpawnSyncReturns<Buffer> = spawnSync(
Expand Down Expand Up @@ -257,24 +313,33 @@ export const scanLocalFile = async (
}
};

function isAllowedMime(mime: string): boolean {
return FILE_TYPES.some((fileType) => fileType.mime === mime);
}

async function getFileTypeFromContents(
filePath: string
): Promise<MimeType | null> {
): Promise<MimeType | false> {
try {
const fileBuffer = await fs.promises.readFile(filePath);

// Get the file type from its contents
const type = await fileTypeFromFile(filePath);

if (!type) {
if(path.extname(filePath) == ".csv") {
if((await looksLikeCsv(filePath, ",", 100))) {
return mimeTypes.lookup(".csv")
}
}
console.log("Could not determine file type.");
return null;
return false;
}
console.log(`File type is ${type.mime} with extension ${type.ext}`);
console.log(`getFileTypeFromContents: File determined to be mime:${type.mime} ext:${type.ext}`);
return type.mime;
} catch (error) {
console.error("Error reading file:", error);
return null;
return false;
}
}

Expand All @@ -291,6 +356,7 @@ function areMimeTypesEquivalent(mime1: string, mime2: string): boolean {
return true;
}
for (const baseType in equivalentTypes) {
console.log("Mime types not identical... checking AKAs for equivalence...")
const equivalents = equivalentTypes[baseType];
if (
(mime1 === baseType && equivalents.has(mime2)) ||
Expand Down

0 comments on commit 25e2cac

Please sign in to comment.