diff --git a/src/services/uploads/package.json b/src/services/uploads/package.json index c23c02631..741c8ee54 100644 --- a/src/services/uploads/package.json +++ b/src/services/uploads/package.json @@ -7,5 +7,8 @@ "scripts": {}, "author": "", "license": "CC0-1.0", - "devDependencies": {} + "devDependencies": {}, + "dependencies": { + "file-type": "^19.0.0" + } } diff --git a/src/services/uploads/serverless.yml b/src/services/uploads/serverless.yml index a572c3f0d..0588fdb07 100644 --- a/src/services/uploads/serverless.yml +++ b/src/services/uploads/serverless.yml @@ -29,6 +29,7 @@ provider: - s3:PutObjectVersionTagging - s3:DeleteObject - s3:ListBucket + - s3:HeadObject Resource: - !Sub arn:aws:s3:::${self:service}-${sls:stage}-attachments-${AWS::AccountId}/* - !Sub arn:aws:s3:::${self:service}-${sls:stage}-avscan-${AWS::AccountId}/* diff --git a/src/services/uploads/src/antivirus.ts b/src/services/uploads/src/antivirus.ts index 2e0a2d811..cacf95873 100644 --- a/src/services/uploads/src/antivirus.ts +++ b/src/services/uploads/src/antivirus.ts @@ -88,7 +88,11 @@ const scanAndTagS3Object = async ( s3ObjectBucket ); utils.generateSystemMessage("Set virusScanStatus"); - virusScanStatus = scanLocalFile(fileLoc); + const metadata = await s3Client.send(new HeadObjectCommand({ + Bucket: s3ObjectBucket, + Key: s3ObjectKey, + })); + virusScanStatus = await scanLocalFile(fileLoc, metadata.ContentType); utils.generateSystemMessage(`virusScanStatus=${virusScanStatus}`); } diff --git a/src/services/uploads/src/clamav.ts b/src/services/uploads/src/clamav.ts index 19c6d5d05..4a9476582 100644 --- a/src/services/uploads/src/clamav.ts +++ b/src/services/uploads/src/clamav.ts @@ -11,6 +11,7 @@ import fs from "fs"; import asyncfs from "fs/promises"; import * as constants from "./constants"; import * as utils from "./utils"; +import {FileExtension, MimeType, fileTypeFromFile} from 'file-type'; const s3Client: S3Client = new S3Client(); @@ -206,8 +207,23 @@ export const uploadAVDefinitions = async (): Promise => { * * @param pathToFile Path in the filesystem where the file is stored. */ -export const scanLocalFile = (pathToFile: string): string | null => { +export const scanLocalFile = async (pathToFile: string, contentType: string | undefined): Promise => { try { + if(!contentType){ + utils.generateSystemMessage("FAILURE - EXTENSION UNKNOWN"); + return constants.STATUS_UNKNOWN_EXTENSION; + } + let detectedContentType = await getFileTypeFromContents(pathToFile); + if(detectedContentType){ + console.log(`File declared extension: ${contentType}`); + console.log(`File detected extension: ${detectedContentType}`) + let same = areMimeTypesEquivalent(contentType, detectedContentType) + if(!same){ + utils.generateSystemMessage(`FAILURE - FILE EXTENSION DOES NOT MATCH FILE CONTENTS`); + return constants.STATUS_EXTENSION_MISMATCH_FILE + } + } + const avResult: SpawnSyncReturns = spawnSync( constants.PATH_TO_CLAMAV, [ @@ -241,4 +257,44 @@ export const scanLocalFile = (pathToFile: string): string | null => { console.log(err); return constants.STATUS_ERROR_PROCESSING_FILE; } -}; \ No newline at end of file +}; + +async function getFileTypeFromContents(filePath: string): Promise { + try { + const fileBuffer = await fs.promises.readFile(filePath); + + // Get the file type from its contents + const type = await fileTypeFromFile(filePath); + + if (!type) { + console.log('Could not determine file type.'); + return null; + } + console.log(`File type is ${type.mime} with extension ${type.ext}`); + return type.mime + } catch (error) { + console.error('Error reading file:', error); + return null + } +} + +function areMimeTypesEquivalent(mime1: string, mime2: string): boolean { + const equivalentTypes: { [key: string]: Set } = { + 'application/rtf': new Set(['text/rtf']), + 'application/vnd.ms-excel': new Set(['application/x-cfb']), + 'application/vnd.ms-powerpoint': new Set(['application/x-cfb']), + 'application/msword': new Set(['application/x-cfb']) + }; + mime1 = mime1.toLowerCase(); + mime2 = mime2.toLowerCase(); + if (mime1 === mime2) { + return true; + } + for (const baseType in equivalentTypes) { + const equivalents = equivalentTypes[baseType]; + if ((mime1 === baseType && equivalents.has(mime2)) || (mime2 === baseType && equivalents.has(mime1))) { + return true; + } + } + return false; +} \ No newline at end of file diff --git a/src/services/uploads/src/constants.ts b/src/services/uploads/src/constants.ts index 61325d3bc..9d1dc060d 100644 --- a/src/services/uploads/src/constants.ts +++ b/src/services/uploads/src/constants.ts @@ -39,6 +39,10 @@ export const STATUS_ERROR_PROCESSING_FILE: string = process.env.STATUS_ERROR_PROCESSING_FILE || "ERROR"; export const STATUS_SKIPPED_FILE: string = process.env.STATUS_SKIPPED_FILE || "SKIPPED"; +export const STATUS_EXTENSION_MISMATCH_FILE: string = + process.env.STATUS_EXTENSION_MISMATCH_FILE || "EXTMISMATCH" +export const STATUS_UNKNOWN_EXTENSION: string = + process.env.STATUS_UNKNOWN_EXTENSION || "UKNOWNEXT" export const VIRUS_SCAN_STATUS_KEY: string = process.env.VIRUS_SCAN_STATUS_KEY || "virusScanStatus"; export const VIRUS_SCAN_TIMESTAMP_KEY: string = diff --git a/yarn.lock b/yarn.lock index eef264b51..938c3a4a5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10183,6 +10183,15 @@ file-type@^16.5.4: strtok3 "^6.2.4" token-types "^4.1.1" +file-type@^19.0.0: + version "19.0.0" + resolved "https://registry.yarnpkg.com/file-type/-/file-type-19.0.0.tgz#62a6cadc43f73ba38c53e1a174943a75fdafafa9" + integrity sha512-s7cxa7/leUWLiXO78DVVfBVse+milos9FitauDLG1pI7lNaJ2+5lzPnr2N24ym+84HVwJL6hVuGfgVE+ALvU8Q== + dependencies: + readable-web-to-node-stream "^3.0.2" + strtok3 "^7.0.0" + token-types "^5.0.1" + file-type@^3.8.0: version "3.9.0" resolved "https://registry.yarnpkg.com/file-type/-/file-type-3.9.0.tgz#257a078384d1db8087bc449d107d52a52672b9e9" @@ -13784,6 +13793,11 @@ peek-readable@^4.1.0: resolved "https://registry.yarnpkg.com/peek-readable/-/peek-readable-4.1.0.tgz#4ece1111bf5c2ad8867c314c81356847e8a62e72" integrity sha512-ZI3LnwUv5nOGbQzD9c2iDG6toheuXSZP5esSHBjopsXH4dg19soufvpUGA3uohi5anFtGb2lhAVdHzH6R/Evvg== +peek-readable@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/peek-readable/-/peek-readable-5.0.0.tgz#7ead2aff25dc40458c60347ea76cfdfd63efdfec" + integrity sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A== + pend@~1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50" @@ -14420,7 +14434,7 @@ readable-stream@^4.2.0: process "^0.11.10" string_decoder "^1.3.0" -readable-web-to-node-stream@^3.0.0: +readable-web-to-node-stream@^3.0.0, readable-web-to-node-stream@^3.0.2: version "3.0.2" resolved "https://registry.yarnpkg.com/readable-web-to-node-stream/-/readable-web-to-node-stream-3.0.2.tgz#5d52bb5df7b54861fd48d015e93a2cb87b3ee0bb" integrity sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw== @@ -15425,6 +15439,14 @@ strtok3@^6.2.4: "@tokenizer/token" "^0.3.0" peek-readable "^4.1.0" +strtok3@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/strtok3/-/strtok3-7.0.0.tgz#868c428b4ade64a8fd8fee7364256001c1a4cbe5" + integrity sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ== + dependencies: + "@tokenizer/token" "^0.3.0" + peek-readable "^5.0.0" + styled-components@^5.3.5: version "5.3.11" resolved "https://registry.yarnpkg.com/styled-components/-/styled-components-5.3.11.tgz#9fda7bf1108e39bf3f3e612fcc18170dedcd57a8" @@ -15782,6 +15804,14 @@ token-types@^4.1.1: "@tokenizer/token" "^0.3.0" ieee754 "^1.2.1" +token-types@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/token-types/-/token-types-5.0.1.tgz#aa9d9e6b23c420a675e55413b180635b86a093b4" + integrity sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg== + dependencies: + "@tokenizer/token" "^0.3.0" + ieee754 "^1.2.1" + totalist@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/totalist/-/totalist-3.0.1.tgz#ba3a3d600c915b1a97872348f79c127475f6acf8"