From c27081383a202ec6ba6151fd3cc01cf7ac0f034e Mon Sep 17 00:00:00 2001 From: ragusaa <54862477+ragusaa@users.noreply.github.com> Date: Thu, 28 Sep 2023 11:29:46 -0400 Subject: [PATCH] Fixed passes so that all signatures in test set will build (#38) Various bug fixes in ClamBCLogicalCompiler, ClamBCRemovePointerPHIs, and ClamBCVerifier to enable all signatures in the test set to finish with passes that have been merged. --- clambcc/clambc-compiler.py | 5 + libclambcc/CMakeLists.txt | 2 + .../ClamBCLogicalCompiler.cpp | 164 ++++++++++++++- .../CMakeLists.txt | 72 +++++++ .../ClamBCLogicalCompilerHelper.cpp | 196 ++++++++++++++++++ .../ClamBCRemoveFreezeInsts/CMakeLists.txt | 72 +++++++ .../ClamBCRemoveFreezeInsts.cpp | 124 +++++++++++ .../ClamBCRemovePointerPHIs.cpp | 4 +- libclambcc/ClamBCVerifier/ClamBCVerifier.cpp | 159 ++++++++++++-- libclambcc/Common/clambc.h | 4 + temp_delete_when_merge/build_all.py | 127 ++++++++++++ 11 files changed, 901 insertions(+), 28 deletions(-) create mode 100644 libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt create mode 100644 libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp create mode 100644 libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt create mode 100644 libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp create mode 100755 temp_delete_when_merge/build_all.py diff --git a/clambcc/clambc-compiler.py b/clambcc/clambc-compiler.py index 5a8e758554..e0ada64402 100755 --- a/clambcc/clambc-compiler.py +++ b/clambcc/clambc-compiler.py @@ -582,6 +582,11 @@ def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inpu f' -clambc-sigfile={sigFile}' ) + print ("NEED TO ADD NEW SLP OPTIONS") + print ("NEED TO ADD ClamBCLogicalCompilerHelper") +#OPTIONS_STR+=" --vectorize-slp=false" +#OPTIONS_STR+=" --vectorize-loops=false" + if standardCompiler: cmd += f" -clambc-standard-compiler" diff --git a/libclambcc/CMakeLists.txt b/libclambcc/CMakeLists.txt index 2cf8b12adf..319997d104 100644 --- a/libclambcc/CMakeLists.txt +++ b/libclambcc/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(ClamBCLogicalCompiler) +add_subdirectory(ClamBCLogicalCompilerHelper) add_subdirectory(ClamBCRemoveUndefs) add_subdirectory(ClamBCPreserveABIs) add_subdirectory(ClamBCAnalyzer) @@ -9,4 +10,5 @@ add_subdirectory(Common) add_subdirectory(ClamBCVerifier) add_subdirectory(ClamBCRemovePointerPHIs) add_subdirectory(ClamBCLowering) +add_subdirectory(ClamBCRemoveFreezeInsts) diff --git a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp b/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp index ee7171e09c..7f14e09649 100644 --- a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp +++ b/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp @@ -604,13 +604,22 @@ class LogicalCompiler { Value *V = LI.getOperand(0); ConstantExpr *CE = dyn_cast(V); - if (!CE || CE->getOpcode() != Instruction::GetElementPtr || - CE->getOperand(0) != GV || CE->getNumOperands() != 3 || - !cast(CE->getOperand(1))->isZero()) { - printDiagnostic("Logical signature: unsupported read", &LI); - return false; + ConstantInt * CI = nullptr; + if (CE) { + if (CE->getOpcode() != Instruction::GetElementPtr || + CE->getOperand(0) != GV || CE->getNumOperands() != 3 || + !cast(CE->getOperand(1))->isZero()) { + printDiagnostic("Logical signature: unsupported read", &LI); + return false; + } + CI = cast(CE->getOperand(2)); + } else { + /* In this case, we are directly loading the global, + * instead of using a getelementptr. + * It is likely that this would have been changed by O3. + */ + CI = ConstantInt::get(LI.getParent()->getParent()->getParent()->getContext(), APInt(64, 0)); } - ConstantInt *CI = cast(CE->getOperand(2)); Map[&LI] = LogicalNode::getSubSig(allNodes, CI->getValue().getZExtValue()); return true; } @@ -934,6 +943,25 @@ class LogicalCompiler } Instruction *pInst = llvm::cast(I); +#if 0 + /*Look through all operands of the instruction and add the + * constants to the logical map, so that we won't fail to create + * the siganture if O3 changes a logical expression to a constant + * at compile time. + */ + for (size_t i = 0; i < pInst->getNumOperands(); i++){ + if (ConstantInt * pci = llvm::dyn_cast(pInst->getOperand(i))){ + if (pci->isOne()){ + LogicalNode * ln = LogicalNode::getTrue(allNodes) ; + Map[pci] = ln; + } else if (pci->isZero()){ + LogicalNode * ln = LogicalNode::getTrue(allNodes) ; + Map[pci] = ln; + } + } + } +#endif + switch (I->getOpcode()) { case Instruction::Load: valid &= processLoad(*cast(I)); @@ -968,18 +996,132 @@ class LogicalCompiler LogicalMap::iterator CondNode = Map.find(SI->getCondition()); LogicalMap::iterator TrueNode = Map.find(SI->getTrueValue()); LogicalMap::iterator FalseNode = Map.find(SI->getFalseValue()); + + +#if 0 + if (Map.end() == TrueNode){ + Value * pv = SI->getTrueValue(); + if (ConstantInt * pci = llvm::dyn_cast(pv)){ + if (pci->isOne()){ + LogicalNode * ln = LogicalNode::getTrue(allNodes) ; + Map[SI->getTrueValue()] = ln; + TrueNode = Map.find(SI->getTrueValue()); + } + } + } + +#endif + + +#if 0 if (CondNode == Map.end() || TrueNode == Map.end() || FalseNode == Map.end()) { + assert (0 && "FTT"); printDiagnostic("Logical signature: select operands must be logical" " expressions", SI); return false; } +#else + /*O3 creates blocks that look like the following, which are legitimate blocks. + * This is essentially an AND of all the %cmp.i instructions. + * Since the cmp instructions all have false at the end, comparisons will be skipped + * after one is found to be false, without having a bunch of branch instructions. + * + * We are going to handle these cases by only adding an 'and' or an 'or' if there is + * an actual logical operation, not for constants. + * + + entry: + %0 = load i32, ptr @__clambc_match_counts, align 16 + %cmp.i116.not = icmp eq i32 %0, 0 + %1 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 1), align 4 + %cmp.i112.not = icmp eq i32 %1, 0 + %or.cond = select i1 %cmp.i116.not, i1 %cmp.i112.not, i1 false + %2 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 2), align 8 + %cmp.i108.not = icmp eq i32 %2, 0 + %or.cond1 = select i1 %or.cond, i1 %cmp.i108.not, i1 false + %3 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 3), align 4 + %cmp.i104.not = icmp eq i32 %3, 0 + + + .... + + br i1 %or.cond15, label %lor.rhs, label %lor.end + + lor.rhs: ; preds = %entry + %17 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 17), align 4 + %cmp.i = icmp ne i32 %17, 0 + br label %lor.end + + lor.end: ; preds = %lor.rhs, %entry + %18 = phi i1 [ true, %entry ], [ %cmp.i, %lor.rhs ] + ret i1 %18 + + */ + if (CondNode == Map.end() || (TrueNode == Map.end() && FalseNode == Map.end())){ + printDiagnostic("Logical signature: select condition must be logical" + " expression", + SI); + return false; + } +#endif // select cond, trueval, falseval -> cond && trueval || !cond && falseval - LogicalNode *N = LogicalNode::getAnd(CondNode->second, - TrueNode->second); - LogicalNode *NotCond = LogicalNode::getNot(CondNode->second); - LogicalNode *N2 = LogicalNode::getAnd(NotCond, FalseNode->second); - Map[SI] = LogicalNode::getOr(N, N2); + LogicalNode *N = nullptr; + LogicalNode *NotCond = nullptr; + LogicalNode *N2 = nullptr; + + if (TrueNode != Map.end()){ + N = LogicalNode::getAnd(CondNode->second, + TrueNode->second); + } else if (ConstantInt * pci = llvm::cast(SI->getTrueValue())){ + if (pci->isOne()){ + N = LogicalNode::getNode(*(CondNode->second)); + } else if (not pci->isZero()) { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + NotCond = LogicalNode::getNot(CondNode->second); + if (FalseNode != Map.end()){ + N2 = LogicalNode::getAnd(NotCond, FalseNode->second); + } else if (ConstantInt * pci = llvm::cast(SI->getFalseValue())){ + if (pci->isOne()){ + N2 = NotCond; + } else if (not pci->isZero()){ + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + LogicalNode * res = nullptr; + if (N && N2){ + res = LogicalNode::getOr(N, N2); + } else if (N){ + res = N; + } else if (N2){ + res = N2; + } else { + /*SHOULD be impossible, but will add a check just in case.*/ + printDiagnostic("Logical signature: Malformed select statement.", + SI); + return false; + } + Map[SI] = res; break; } case Instruction::Ret: { diff --git a/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt b/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt new file mode 100644 index 0000000000..27fdcc6be0 --- /dev/null +++ b/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambclogicalcompilerhelper object library +# +add_library(clambclogicalcompilerhelper_obj OBJECT) +target_sources(clambclogicalcompilerhelper_obj + PRIVATE + ClamBCLogicalCompilerHelper.cpp +) + +target_include_directories(clambclogicalcompilerhelper_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambclogicalcompilerhelper_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambclogicalcompilerhelper_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambclogicalcompilerhelper shared library. +# +add_library( clambclogicalcompilerhelper SHARED ) +target_link_libraries( clambclogicalcompilerhelper + PUBLIC + clambclogicalcompilerhelper_obj ) +set_target_properties( clambclogicalcompilerhelper PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambclogicalcompilerhelper PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambclogicalcompilerhelper PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambclogicalcompilerhelper DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambclogicalcompilerhelper DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp b/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp new file mode 100644 index 0000000000..3fea785783 --- /dev/null +++ b/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp @@ -0,0 +1,196 @@ +/* + * Compile LLVM bytecode to logical signatures. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/ClamBCModule.h" +#include "Common/clambc.h" +#include "Common/bytecode_api.h" +#include "Common/ClamBCDiagnostics.h" +#include "Common/ClamBCCommon.h" +#include "Common/ClamBCUtilities.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Since the logical compiler requires 'setvirusname' to only be called with a string constant, + * we are going to undo the PHI nodes added by O3 that would have to + * + * + * Consider the code + + return.sink.split: ; preds = %if.end39, %for.end + %.str.1.sink = phi ptr [ @.str, %for.end ], [ @.str.1, %if.end39 ] + %call.i70 = call i32 @setvirusname(ptr noundef nonnull %.str.1.sink, i32 noundef 0) #6 + br label %return + + We will just add the calls to setvirusname to the predecessor basic blocks. + * + * + */ + + +#define DEBUG_TYPE "lsigcompilerhelper" + +using namespace llvm; + +namespace ClamBCLogicalCompilerHelper +{ + + class ClamBCLogicalCompilerHelper : public PassInfoMixin + { + public: + ClamBCLogicalCompilerHelper() {} + + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM); + virtual void getAnalysisUsage(AnalysisUsage &AU) const + { + } + + protected: + llvm::Module *pMod; + bool bChanged = false; + + virtual void populateArgs(const CallInst * pci, std::vector & args){ + for (auto i = pci->arg_begin(), e = pci->arg_end(); i != e; i++){ + args.push_back(llvm::dyn_cast(i)); + } + } + virtual void processPHI(PHINode * phi, Function * pCalledFunction, std::vector & args); + + virtual void fixupSetVirusNameCalls(); + }; + + + /* + * Add calls to setvirusname for each constant string, rather allowing a phinode to + * choose the string. This is a requirement for ClamBCLogicalCompiler. + */ + void ClamBCLogicalCompilerHelper::processPHI(PHINode * phi, Function * pCalledFunction, std::vector & args){ + + for (size_t i = 0; i < phi->getNumIncomingValues(); i++){ + BasicBlock * pBB = phi->getIncomingBlock(i); + Value * pVal = phi->getIncomingValue(i); + + Instruction * pTerm = pBB->getTerminator(); + args[0] = pVal; + + CallInst::Create(pCalledFunction->getFunctionType(), pCalledFunction, args, "ClamBCLogicalCompilerHelper_callInst", pTerm); + } + + } + + + /* + * Find all calls to setvirusname, and make sure they aren't loading the + * first argument from a variable. + */ + void ClamBCLogicalCompilerHelper::fixupSetVirusNameCalls(){ + + std::vector erase; + Function *svn = pMod->getFunction("setvirusname"); + if (nullptr == svn){ + return; + } + for (auto iter : svn->users()) { + if (CallInst * pci = llvm::dyn_cast(iter)){ + bChanged = true; + Value * operand = pci->getOperand(0); + + if (PHINode * phi = llvm::dyn_cast(operand)){ + std::vector args; + populateArgs(pci, args); + processPHI(phi, svn, args); + erase.push_back(pci); + /*This leaves a block with only a branch instruction (essentially empty). + * I don't think that is an issue, but consider removing it.*/ + erase.push_back(phi); + } + } + } + + for (size_t i = 0; i < erase.size(); i++){ + erase[i]->eraseFromParent(); + } + + } + + + PreservedAnalyses ClamBCLogicalCompilerHelper::run(Module & mod, ModuleAnalysisManager & mam) + { + pMod = &mod; + + fixupSetVirusNameCalls(); + + if (bChanged){ + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + + // This part is the new way of registering your pass + extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK + llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLogicalCompilerHelper", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-lcompiler-helper"){ + FPM.addPass(ClamBCLogicalCompilerHelper()); + return true; + } + return false; + } + ); + } + }; + } + +} // namespace + + diff --git a/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt b/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt new file mode 100644 index 0000000000..2fa8b0ba2c --- /dev/null +++ b/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremovefreezeinsts object library +# +add_library(clambcremovefreezeinsts_obj OBJECT) +target_sources(clambcremovefreezeinsts_obj + PRIVATE + ClamBCRemoveFreezeInsts.cpp +) + +target_include_directories(clambcremovefreezeinsts_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremovefreezeinsts_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremovefreezeinsts_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremovefreezeinsts shared library. +# +add_library( clambcremovefreezeinsts SHARED ) +target_link_libraries( clambcremovefreezeinsts + PUBLIC + clambcremovefreezeinsts_obj ) +set_target_properties( clambcremovefreezeinsts PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremovefreezeinsts PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremovefreezeinsts PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremovefreezeinsts DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremovefreezeinsts DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp b/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp new file mode 100644 index 0000000000..84679e9dea --- /dev/null +++ b/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp @@ -0,0 +1,124 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/clambc.h" +#include "Common/ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ + /* + * Freeze Instructions are to guarantee sane behaviour in the case of undefs or poison values. The interpreter + * has no notion of freeze instructions, so we are removing them. The verifier will fail if there are undef or + * poison values in the IR, so this is safe to do. + */ + struct ClamBCRemoveFreezeInsts : public PassInfoMixin + { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + virtual void gatherFreezeInsts(Function *pFunc, std::vector & freezeInsts) { + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++){ + BasicBlock * pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++){ + if (FreezeInst * pfi = llvm::dyn_cast(bi)){ + freezeInsts.push_back(pfi); + } + } + } + + } + + virtual void processFunction(Function *pFunc) { + vector freezeInsts; + gatherFreezeInsts(pFunc, freezeInsts); + + for (size_t i = 0; i < freezeInsts.size(); i++){ + bChanged = true; + + FreezeInst * pfi = freezeInsts[i]; + pfi->replaceAllUsesWith(pfi->getOperand(0)); + pfi->eraseFromParent(); + } + } + + public: + + virtual ~ClamBCRemoveFreezeInsts() {} + + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::cast(i); + if (pFunc->isDeclaration()) { + continue; + } + + processFunction(pFunc); + } + + if (bChanged){ + return PreservedAnalyses::none(); + } else{ + return PreservedAnalyses::all(); + } + } + }; // end of struct ClamBCRemoveFreezeInsts + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveFreezeInsts", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-freeze-insts"){ + FPM.addPass(ClamBCRemoveFreezeInsts()); + return true; + } + return false; + } + ); + } + }; +} + + + diff --git a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp b/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp index 5b3c6f1107..2596f44194 100644 --- a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp +++ b/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp @@ -228,7 +228,7 @@ class ClamBCRemovePointerPHIs : public PassInfoMixin std::vector newInsts; Instruction *insPt = findFirstNonPHI(pn->getParent()); - Instruction *gepiNew = GetElementPtrInst::Create(nullptr, pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); + Instruction *gepiNew = GetElementPtrInst::Create(pBasePtr->getType(), pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); if (pn->getType() != gepiNew->getType()) { gepiNew = CastInst::CreatePointerCast(gepiNew, pn->getType(), "ClamBCRemovePointerPHIs_cast_", insPt); } @@ -299,8 +299,6 @@ class ClamBCRemovePointerPHIs : public PassInfoMixin pFunc = &F; bool ret = false; - llvm::errs() << "<" << __FILE__ << "::" << __FUNCTION__ << "::" << __LINE__ << "\n\n"; - std::vector phis = gatherPHIs(); for (size_t i = 0; i < phis.size(); i++) { PHINode *pn = phis[i]; diff --git a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp b/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp index 9344706b25..3079bc9d13 100644 --- a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp +++ b/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp @@ -103,7 +103,7 @@ static cl::opt StopOnFirstError("clambc-stopfirst", cl::init(false), cl::desc("Stop on first error in the verifier")); #else -static bool StopOnFirstError = true; +//static bool StopOnFirstError = true; #endif namespace ClamBCVerifier @@ -112,9 +112,11 @@ class ClamBCVerifier : public PassInfoMixin, public InstVisitor { +#if 0 ScalarEvolution *SE; DominatorTree *DT; BasicBlock *AbrtBB; +#endif bool Final; llvm::Module *pMod = nullptr; @@ -135,6 +137,7 @@ class ClamBCVerifier : public PassInfoMixin, } bool visitSelectInst(SelectInst &I) { + llvm::errs() << "<" << __FUNCTION__<< "::" << __LINE__ << ">" << "Selects need tobe removed, so this should be a false\n"; return true; } bool visitBranchInst(BranchInst &BI) @@ -159,8 +162,23 @@ class ClamBCVerifier : public PassInfoMixin, return true; } + /* + * FreezeInst's are used to guarantee a value being set to something fixed + * if it is undef or a poison value. They are a noop otherwise, so we will allow + * them in the verifier, and remove them in a pass to be run after the verifier. + * (a 'verifier' shouldn't be changing the IR). + */ + bool visitFreezeInst(FreezeInst &I){ + return true; + } + bool visitInstruction(Instruction &I) { + + DEBUG_VALUE(&I); +#define DEBUG_NODEREF(val) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << val << "\n"; + DEBUG_NODEREF(llvm::isa(&I)); + printDiagnostic("Unhandled instruction in verifier", &I); return false; } @@ -244,6 +262,107 @@ class ClamBCVerifier : public PassInfoMixin, return true; } + virtual bool isHandled(Instruction *pInst){ + bool bRet = llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + ; + + return bRet; + + } + + virtual bool isUndefOrPoisonValue(Value * pv){ + return llvm::isa(pv); + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce, std::set & visited){ + if (visited.end() != std::find(visited.begin(), visited.end(), pce)) { + return false; + } + visited.insert(pce); + + for (size_t i = 0; i < pce->getNumOperands(); i++){ + Value * pv = pce->getOperand(i); + if (isUndefOrPoisonValue(pv)){ + return true; + } + if (ConstantExpr * ce = llvm::dyn_cast(pv)){ + if (hasUndefsOrPoisonValues(ce, visited)){ + return true; + } + } + } + + return false; + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce){ + std::set visited; + return hasUndefsOrPoisonValues(pce, visited); + } + + /*PoisonValue is derived from UndefValue, so we only have to check for that one.*/ + virtual bool hasUndefsOrPoisonValues(Instruction *pInst){ + for (size_t i = 0; i < pInst->getNumOperands(); i++){ + Value * pVal = pInst->getOperand(i); + if (llvm::isa(pVal)){ + continue; + } + + if (isUndefOrPoisonValue(pVal)){ + return true; + } + + if (ConstantExpr * pce = llvm::dyn_cast(pVal)){ + if (hasUndefsOrPoisonValues(pce)){ + return true; + } + } + + } + return false; + } + + virtual bool walk (Function * pFunc){ + bool bRet = true; + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++){ + BasicBlock * pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++){ + Instruction * pInst = llvm::cast(bi); + if (hasUndefsOrPoisonValues(pInst)){ + printDiagnostic("Poison value or Undef value found in instruction.", pInst); + return false; + } + + if (PHINode * pn = llvm::dyn_cast(pInst)){ + bRet = visitPHINode(*pn); + } else if (CallInst * pci = llvm::dyn_cast(pInst)){ + bRet = visitCallInst(*pci); + } else if (SwitchInst * psi = llvm::dyn_cast(pInst)){ + bRet = visitSwitchInst(*psi); + } else { + bRet = isHandled(pInst); + } + + if (!bRet){ + break; + } + } + } + + return bRet; + } + public: //static char ID; explicit ClamBCVerifier(): @@ -254,6 +373,8 @@ class ClamBCVerifier : public PassInfoMixin, return "ClamAV Bytecode Verifier"; } + + #if 0 virtual bool runOnFunction(Function &F) #else @@ -261,28 +382,38 @@ class ClamBCVerifier : public PassInfoMixin, #endif { pMod = F.getParent(); +#if 0 AbrtBB = 0; //SE = &getAnalysis().getSE(); SE = &fam.getResult(F); //DT = &getAnalysis().getDomTree(); DT = &fam.getResult(F); +#endif - bool OK = true; - std::vector insns; - // verifying can insert runtime checks, so be safe and create an initial - // list of instructions to process so we are not affected by transforms. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - insns.push_back(&*I); - } - for (std::vector::iterator I = insns.begin(), E = insns.end(); - I != E; ++I) { - OK &= visit(*I); - if (!OK && StopOnFirstError) - break; + bool OK = walk(&F); +#if 0 + if (OK) { + + std::vector insns; + // verifying can insert runtime checks, so be safe and create an initial + // list of instructions to process so we are not affected by transforms. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + insns.push_back(&*I); + } + for (std::vector::iterator I = insns.begin(), E = insns.end(); + I != E; ++I) { + OK &= visit(*I); + if (!OK && StopOnFirstError) + break; + } } - if (!OK) +#endif + + if (!OK) { ClamBCStop("Verifier rejected bytecode function due to errors", &F); + } + return PreservedAnalyses::all(); } virtual void getAnalysisUsage(AnalysisUsage &AU) const diff --git a/libclambcc/Common/clambc.h b/libclambcc/Common/clambc.h index bda3761d9c..3e828bd6f6 100644 --- a/libclambcc/Common/clambc.h +++ b/libclambcc/Common/clambc.h @@ -144,5 +144,9 @@ enum bc_global { #define DEBUG_VALUE(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << *__value__ << "\n"; #endif +#ifndef DEBUG_NONPOINTER +#define DEBUG_NONPOINTER(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << __value__ << "\n"; +#endif + #define BC_START_TID 69 #endif diff --git a/temp_delete_when_merge/build_all.py b/temp_delete_when_merge/build_all.py new file mode 100755 index 0000000000..f05cebc33f --- /dev/null +++ b/temp_delete_when_merge/build_all.py @@ -0,0 +1,127 @@ +#!/usr/bin/python3 + +import os + +os.system("rm -f *.ll") + +SIG_DIR='sigs' + +COMPILE_CMD = """clang-16 \ + -S \ + -fno-discard-value-names \ + -fno-vectorize \ + --language=c \ + -emit-llvm \ + -Werror=unused-command-line-argument \ + -Xclang \ + -disable-O0-optnone \ + %s \ + -o \ + %s \ + -I \ + /home/aragusa/clamav-bytecode-compiler-upstream/build/install/bin/../include \ + -include \ + bytecode.h \ + -D__CLAMBC__""" + +OPTIONS_STR='--disable-loop-unrolling' +OPTIONS_STR+=" --disable-i2p-p2i-opt" +OPTIONS_STR+=" --disable-loop-unrolling" +OPTIONS_STR+=" --disable-promote-alloca-to-lds" +OPTIONS_STR+=" --disable-promote-alloca-to-vector" +OPTIONS_STR+=" --disable-simplify-libcalls" +OPTIONS_STR+=" --disable-tail-calls" +#OPTIONS_STR+=" --polly-vectorizer=none" +#OPTIONS_STR+=" --loop-vectorize" +OPTIONS_STR+=" --vectorize-slp=false" +OPTIONS_STR+=" --vectorize-loops=false" +#OPTIONS_STR+=" --disable-loop-vectorization" + + + + +PASS_STR = "function(mem2reg)" +PASS_STR+=',' +PASS_STR+='clambc-remove-undefs' +PASS_STR+=',' +PASS_STR+='clambc-preserve-abis' +PASS_STR+=',default' +#PASS_STR+=',default' +PASS_STR+=',clambc-preserve-abis' #remove fake function calls because O3 has already run +PASS_STR+=',function(clambc-remove-pointer-phis)' +PASS_STR+=',clambc-lowering-notfinal' # perform lowering pass +PASS_STR+=',lowerswitch' + +PASS_STR+=',function(clambc-verifier)' +PASS_STR+=',clambc-remove-freeze-insts' + + + +#print ("TODO: Put verifier back") + +#PASS_STR+=',clambc-lowering-notfinal' # perform lowering pass +#PASS_STR+=',clambc-lcompiler' #compile the logical_trigger function to a + +INSTALL_DIR=os.path.join(os.getcwd(), "..") +LOAD_STR = "--load %s/install/lib/libclambccommon.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcremoveundefs.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcpreserveabis.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcanalyzer.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcremovepointerphis.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcloweringf.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcloweringnf.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcverifier.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambclogicalcompiler.so " % INSTALL_DIR +LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcremovefreezeinsts.so " % INSTALL_DIR +#LOAD_STR += "--load-pass-plugin %s/install/lib/libclambcrebuild.so" % INSTALL_DIR + +#OPT_CMD = 'opt-16 -S %s --passes=\"-mem2reg\" --passes=\"%s\" %s ' % (LOAD_STR, PASS_STR, OPTIONS_STR) +OPT_CMD = 'opt-16 -S %s --passes=\"%s\" %s ' % (LOAD_STR, PASS_STR, OPTIONS_STR) + + +""" +#This is to find undefs. +print ("Take this part out, used to find undefs") +#PASS_STR = 'default' +OPTIONS_STR = '' +OPTIONS_STR+=" --vectorize-slp=false" +OPTIONS_STR+=" --vectorize-loops=false" +OPT_CMD = 'opt-16 -S %s --passes=\"%s\" %s ' % (LOAD_STR, PASS_STR, OPTIONS_STR) +""" + + + +OPT_CMD += "%s -o %s" + + + + + + +def run(cmd): + return os.system(cmd) + + +def compileFile(d, name): + llFile = name[:-1] + "ll" + + cmd = COMPILE_CMD % (os.path.join(d,name), llFile) + if (run(cmd)): + return + + cmd = OPT_CMD % (llFile, llFile + ".optimized.ll") + print (cmd) + + return run(cmd) + + +if '__main__' == __name__: + for s in os.listdir(SIG_DIR): + if (compileFile(SIG_DIR, s)): + print (f"Failed on {s}") + break +# os.system("rm -f *.ll") + + + +