diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f489ac29f..05c0c815fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,14 +24,14 @@ project( ClamBCC DESCRIPTION "ClamAV Bytecode Compiler." ) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) -include(Version) +#include(Version) set(PACKAGE_NAME "${PROJECT_NAME}") set(PACKAGE_VERSION "${PROJECT_VERSION}") set(PACKAGE_STRING "${PROJECT_NAME} ${PROJECT_VERSION}${VERSION_SUFFIX}") set(PACKAGE_BUGREPORT "https://github.com/Cisco-Talos/clamav-bytecode-compiler/issues") set(PACKAGE_URL "https://www.clamav.net/") -HexVersion(PACKAGE_VERSION_NUM ${PROJECT_VERSION_MAJOR} ${PROJECT_VERSION_MINOR} ${PROJECT_VERSION_PATCH}) +#HexVersion(PACKAGE_VERSION_NUM ${PROJECT_VERSION_MAJOR} ${PROJECT_VERSION_MINOR} ${PROJECT_VERSION_PATCH}) # libtool library versioning rules: http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html set(LIBCLAMBC_CURRENT 1) @@ -40,7 +40,7 @@ set(LIBCLAMBC_AGE 0) math(EXPR LIBCLAMBC_SOVERSION "${LIBCLAMBC_CURRENT} - ${LIBCLAMBC_AGE}") set(LIBCLAMBC_VERSION "${LIBCLAMBC_SOVERSION}.${LIBCLAMBC_AGE}.${LIBCLAMBC_REVISION}") -HexVersion(LIBCLAMBC_VERSION_NUM ${LIBCLAMBC_CURRENT} ${LIBCLAMBC_REVISION} ${LIBCLAMBC_AGE}) +#HexVersion(LIBCLAMBC_VERSION_NUM ${LIBCLAMBC_CURRENT} ${LIBCLAMBC_REVISION} ${LIBCLAMBC_AGE}) # Git optionally used to add commit info into build to differentiate in bug reports. find_package(Git) @@ -103,10 +103,10 @@ if(ENABLE_TESTS) set(Python3_TEST_PACKAGE "pytest;-v") endif() - find_package(ClamAV REQUIRED) + #find_package(ClamAV REQUIRED) endif() -find_package(LLVM 8 REQUIRED) +find_package(LLVM 16 REQUIRED) # Do not disable assertions based on CMAKE_BUILD_TYPE. foreach(_build_type "Release" "MinSizeRel" "RelWithDebInfo") @@ -187,10 +187,15 @@ configure_file(clambc-version.h.in clambc-version.h) # Build targets! # +include(AddLLVM) + # The bytecode compiler optimization passes # This is the core of the bytecode compiler add_subdirectory(libclambcc) +# Examples of plugins for the new and legacy pass managers. +add_subdirectory(examples) + # The bytecode compiler application # This is really just a python script add_subdirectory(clambcc) @@ -212,17 +217,18 @@ add_subdirectory(headers) # `pandoc -s file.tex -o file.md` mostly-works, but w/ the doxygen integration is insufficient. # add_subdirectory(docs) -if(ENABLE_EXAMPLES) - # Example optimization passes; boilerplate to help compiler devs write new passes. - add_subdirectory( examples ) -endif() +#if(ENABLE_EXAMPLES) +# # Example optimization passes; boilerplate to help compiler devs write new passes. +# add_subdirectory( examples ) +#endif() include(CTest) + add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND}) -if(ENABLE_TESTS) - # Tests to verify compiler works as intended and that signatures behave as intended. - add_subdirectory( test ) -endif() +#if(ENABLE_TESTS) +# # Tests to verify compiler works as intended and that signatures behave as intended. +# add_subdirectory( test ) +#endif() if(WIN32) # Include the license(s) in the installation diff --git a/clambcc/clambc-compiler.py b/clambcc/clambc-compiler.py index 7bf72ab6a8..ac45b6dd60 100755 --- a/clambcc/clambc-compiler.py +++ b/clambcc/clambc-compiler.py @@ -11,13 +11,11 @@ #These are the list of supported versions -#consider changing this to start at 8 and go up to 99. That will cover us -#from having to update this when new versions come out. -CLANG_LLVM_KNOWN_VERSIONS = [8, 9, 10, 11, 12] +CLANG_LLVM_KNOWN_VERSIONS = [16] #This is the min clang/llvm version this has been tested with. -MIN_CLANG_LLVM_VERSION = 8 -PREFERRED_CLANG_LLVM_VERSION = 8 +MIN_CLANG_LLVM_VERSION = 16 +PREFERRED_CLANG_LLVM_VERSION = 16 CLANG_NAME = "clang" LLVM_NAME = "opt" @@ -52,7 +50,7 @@ FOUND_SHARED_OBJ = False SHARED_OBJ_DIR = Path(__file__).parent / '..' / 'lib' -if (SHARED_OBJ_DIR / 'libclambcc.so').exists(): +if (SHARED_OBJ_DIR / 'libclambccommon.so').exists(): SHARED_OBJ_FILE = SHARED_OBJ_DIR / 'libclambcc.so' FOUND_SHARED_OBJ = True @@ -112,7 +110,16 @@ def validate(self) -> bool: def run(cmd: str) -> int: if VERBOSE: print(cmd) - return os.system(cmd) + #return os.system(cmd) + + ret = os.system(cmd) + if ret: + print (cmd) + print (ret) + sys.exit(1) + + return ret + def die(msg: str, exitStatus: int) -> None: @@ -180,17 +187,26 @@ def compileFile(clangLLVM: ClangLLVM, fileName: str, debugBuild: bool, standardC for d in options.defines: defines += f"-D{d} " + print ("TODO: Put clang options in a list") cmd = f"{clangLLVM.getClang()} \ - -S \ - -fno-discard-value-names \ - --language=c \ - -emit-llvm \ - -Werror=unused-command-line-argument \ - -Xclang \ - -disable-O0-optnone \ - -o {outFile} \ - {fileName} \ - " + -S \ + -fno-discard-value-names \ + -Wno-implicit-function-declaration \ + -fno-vectorize \ + --language=c \ + -emit-llvm \ + -Werror=unused-command-line-argument \ + -Xclang \ + -disable-O0-optnone \ + -Xclang -no-opaque-pointers \ + {fileName} \ + -o \ + {outFile} \ + -I \ + {INCDIR} \ + -include \ + bytecode.h \ + -D__CLAMBC__" cmd += f" \ {includePaths} \ @@ -201,11 +217,12 @@ def compileFile(clangLLVM: ClangLLVM, fileName: str, debugBuild: bool, standardC cmd += " -g \ " - if (not standardCompiler): - cmd += f" -I {INCDIR} \ - -include bytecode.h \ - -D__CLAMBC__ \ - " +#TODO: Remove the 'standardCompiler' thing +# if (not standardCompiler): +# cmd += f" -I {INCDIR} \ +# -include bytecode.h \ +# -D__CLAMBC__ \ +# " if options.disableCommonWarnings: cmd += COMMON_WARNING_OPTIONS @@ -261,7 +278,8 @@ def linkIRFiles(clangLLVM: ClangLLVM, linkedFile: str, irFiles: list) -> int: Returns the exit status code for the call to `llvm-link`. ''' inFiles = " ".join(irFiles) - cmd = f"{clangLLVM.getLLVMLink()} -S -o {linkedFile} {inFiles}" + print ("TODO: Put llvm-link args in a list") + cmd = f"{clangLLVM.getLLVMLink()} -opaque-pointers=0 -S -o {linkedFile} {inFiles}" return run(cmd) @@ -444,13 +462,15 @@ def getOutputString(linked: IRFile, ignore: IRFile) -> str: def createOptimizedTmpFile(clangLLVM: ClangLLVM, linkedFile: str) -> str: name = getOptimizedTmpFileName(linkedFile) - cmd = f"{clangLLVM.getOpt()} \ + cmd = f'{clangLLVM.getOpt()} \ -S \ {linkedFile} \ -o {name} \ - -internalize -internalize-public-api-list=entrypoint \ - -globalopt \ - " + -internalize-public-api-list=entrypoint \ + --passes="internalize,globalopt" \ + ' + + print ("TODO: put this in an array") ret = run(cmd) if None == ret: @@ -491,11 +511,122 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: return res +INTERNALIZE_API_LIST=[ "_Z10entrypointv" + , "entrypoint" + , "__clambc_kind" + , "__clambc_virusname_prefix" + , "__clambc_virusnames" + , "__clambc_filesize" + , "__clambc_match_counts" + , "__clambc_match_offsets" + , "__clambc_pedata" + , "__Copyright" + ] + +OPTIMIZE_OPTIONS = ["-S" + , "--disable-loop-unrolling" + , " --disable-i2p-p2i-opt" + , " --disable-loop-unrolling" + , " --disable-promote-alloca-to-lds" + , " --disable-promote-alloca-to-vector" + , " --disable-simplify-libcalls" + , " --disable-tail-calls" + , " --vectorize-slp=false" + , " --vectorize-loops=false" + , " -internalize-public-api-list=\"%s\"" % ','.join(INTERNALIZE_API_LIST) + ] + +#TODO: Remove this when we properly handle opaque pointers. +OPTIMIZE_OPTIONS.append("-opaque-pointers=0") + +OPTIMIZE_PASSES = ["function(mem2reg)" + , 'verify' + , 'clambc-preserve-abis' + , 'verify' + , 'default' + , 'globalopt' + , 'clambc-preserve-abis' #remove fake function calls because O3 has already run + , 'verify' + , 'clambc-remove-unsupported-icmp-intrinsics' + , 'verify' + , 'clambc-remove-usub' + , 'verify' + , 'clambc-remove-fshl' + , 'verify' + , 'clambc-lowering-notfinal' # perform lowering pass + , 'verify' + , 'lowerswitch' + , 'verify' + , 'clambc-remove-icmp-sle' + , 'verify' + , 'function(clambc-verifier)' + , 'verify' + , 'clambc-remove-freeze-insts' + , 'verify' + , 'clambc-lowering-notfinal' # perform lowering pass + , 'verify' + , 'clambc-lcompiler-helper' #compile the logical_trigger function to a + , 'verify' + , 'clambc-lcompiler' #compile the logical_trigger function to a + , 'verify' + , 'internalize' + , 'verify' + , 'clambc-rebuild' + , 'verify' + , 'clambc-trace' + , 'verify' + , 'clambc-outline-endianness-calls' + , 'verify' + , 'clambc-change-malloc-arg-size' + , 'verify' + , 'clambc-extend-phis-to-64-bit' + , 'verify' + , 'clambc-convert-memsets-to-32Bit' + , 'verify' + , 'globalopt' + , 'clambc-prepare-geps-for-writer' + , 'verify' + , 'clambc-writer' + , 'verify' +] + +OPTIMIZE_LOADS=[ f"--load {SHARED_OBJ_DIR}/libclambccommon.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveundefs.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcpreserveabis.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveunsupportedicmpintrinsics.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveusub.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovefshl.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovepointerphis.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcloweringnf.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveicmpsle.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcverifier.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovefreezeinsts.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcloweringf.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambclogicalcompilerhelper.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambclogicalcompiler.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcrebuild.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambctrace.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcoutlineendiannesscalls.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcchangemallocargsize.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcextendphisto64bit.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcconvertmemsetsto32bit.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcpreparegepsforwriter.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcanalyzer.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcregalloc.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcwriter.so" +] + + + def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inputSourceFile: str, standardCompiler: bool) -> int: + """ internalizeAPIList = "_Z10entrypointv,entrypoint,__clambc_kind,__clambc_virusname_prefix,__clambc_virusnames,__clambc_filesize,__clambc_match_counts,__clambc_match_offsets,__clambc_pedata,__Copyright" - if standardCompiler: - internalizeAPIList += ",main" +# if standardCompiler: +# internalizeAPIList += ",main" + + + #TODO: Modify ClamBCRemoveUndefs to not require mem2reg to be run before it. cmd = (f'{clangLLVM.getOpt()} ' @@ -574,16 +705,29 @@ def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inpu #otherwise the writer gets #unhappy. f' -globalopt' - f' -clambc-convert-intrinsics' #convert all memset intrinsics to + f' -clambc-convert-memsets-to-32Bit' #convert all memset intrinsics to #the 32-bit instead of the 64-bit #intrinsic f' -clambc-writer' #write the bytecode f' -clambc-writer-input-source={inputSourceFile}' f' -clambc-sigfile={sigFile}' ) + """ + + + + cmd = f"{clangLLVM.getOpt()} %s -o %s %s %s --passes=\"%s\" -clambc-writer-input-source=%s -clambc-sigfile=%s" % ( + inFile + , outFile + , " ".join(OPTIMIZE_OPTIONS) + , " ".join(OPTIMIZE_LOADS) + , ",".join(OPTIMIZE_PASSES) + , inputSourceFile + , sigFile + ) - if standardCompiler: - cmd += f" -clambc-standard-compiler" +# if standardCompiler: +# cmd += f" -clambc-standard-compiler" return run(cmd) diff --git a/cmake/FindClamAV.cmake b/cmake/FindClamAV.cmake deleted file mode 100644 index 0a23a1bd9d..0000000000 --- a/cmake/FindClamAV.cmake +++ /dev/null @@ -1,94 +0,0 @@ -# -# Find the ClamAV programs and headers needed for the test suite. -# -# If found, will set: -# ClamAV_FOUND, ClamAV_VERSION, and -# - clamscan_EXECUTABLE -# - clambc_EXECUTABLE -# - sigtool_EXECUTABLE -# - clambc_headers_DIRECTORY -# -# If you have a custom install location for ClamAV, you can provide a hint -# by settings -DClamAV_HOME= -# - -find_program(clamscan_EXECUTABLE - NAMES clamscan clamscan.exe - HINTS "${ClamAV_HOME}" - PATH_SUFFIXES "bin" -) -if(NOT clamscan_EXECUTABLE AND NOT ClamAV_FIND_QUIETLY) - message("Unable to find clamscan") -endif() - -find_program(clambc_EXECUTABLE - NAMES clambc clambc.exe - HINTS "${ClamAV_HOME}" - PATH_SUFFIXES "bin" -) -if(NOT clambc_EXECUTABLE_EXECUTABLE AND NOT ClamAV_FIND_QUIETLY) - message("Unable to find clambc") -endif() - -find_program(sigtool_EXECUTABLE - NAMES sigtool sigtool.exe - HINTS "${ClamAV_HOME}" - PATH_SUFFIXES "bin" -) -if(NOT sigtool_EXECUTABLE AND NOT ClamAV_FIND_QUIETLY) - message("Unable to find sigtool") -endif() - -if(clamscan_EXECUTABLE AND clambc_EXECUTABLE AND sigtool_EXECUTABLE) - execute_process(COMMAND "${clamscan_EXECUTABLE}" --version - OUTPUT_VARIABLE ClamAV_VERSION_OUTPUT - ERROR_VARIABLE ClamAV_VERSION_ERROR - RESULT_VARIABLE ClamAV_VERSION_RESULT - ) - if(NOT ${ClamAV_VERSION_RESULT} EQUAL 0) - if(NOT ClamAV_FIND_QUIETLY) - message(STATUS "ClamAV not found: Failed to determine version.") - endif() - unset(clamscan_EXECUTABLE) - else() - string(REGEX - MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?(-devel)?" - ClamAV_VERSION "${ClamAV_VERSION_OUTPUT}" - ) - set(ClamAV_VERSION "${ClamAV_VERSION}") - set(ClamAV_FOUND 1) - - # Look for the clambc-headers. E.g.: /lib/clambc-headers/0.104.0 - # - # In the future, the clamav-derived headers for compiling signatures will be - # installed with clamav, and this path will be necessary to find them for running - # the test suite. - find_file(clambc_headers_DIRECTORY - clambc-headers/${ClamAV_VERSION} - HINTS "${ClamAV_HOME}" - PATH_SUFFIXES "lib" - ) - - if(NOT ClamAV_FIND_QUIETLY) - message(STATUS "ClamAV found: ${ClamAV_VERSION}") - message(STATUS " clamscan: ${clamscan_EXECUTABLE}") - message(STATUS " clambc: ${clambc_EXECUTABLE}") - message(STATUS " sigtool: ${sigtool_EXECUTABLE}") - message(STATUS " bc headers: ${clambc_headers_DIRECTORY}") - endif() - - if(NOT clambc_headers_DIRECTORY) - set(clambc_headers_DIRECTORY "") - endif() - endif() - - mark_as_advanced(clamscan_EXECUTABLE clambc_EXECUTABLE sigtool_EXECUTABLE ClamAV_VERSION) -else() - if(ClamAV_FIND_REQUIRED) - message(FATAL_ERROR "ClamAV not found.") - else() - if(NOT ClamAV_FIND_QUIETLY) - message(STATUS "${_msg}") - endif() - endif() -endif() diff --git a/cmake/FindClang.cmake b/cmake/FindClang.cmake deleted file mode 100644 index 4db126c9ed..0000000000 --- a/cmake/FindClang.cmake +++ /dev/null @@ -1,177 +0,0 @@ -# Detect Clang libraries -# -# Defines the following variables: -# CLANG_FOUND - True if Clang was found -# CLANG_INCLUDE_DIRS - Where to find Clang includes -# CLANG_LIBRARY_DIRS - Where to find Clang libraries -# CLANG_BUILTIN_DIR - Where to find Clang builtin includes -# -# CLANG_CLANG_LIB - Libclang C library -# -# CLANG_CLANGFRONTEND_LIB - Clang Frontend (C++) Library -# CLANG_CLANGDRIVER_LIB - Clang Driver (C++) Library -# ... -# -# CLANG_LIBS - All the Clang C++ libraries -# -# Uses the same include and library paths detected by FindLLVM.cmake -# -# See https://clang.llvm.org/docs/InternalsManual.html for full list of libraries - -#============================================================================= -# Copyright 2014-2015 Kevin Funk -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. - -#============================================================================= - -set(KNOWN_VERSIONS 11 10 9 8 7 6.0 5.0 4.0 3.9 3.8) - -foreach(version ${KNOWN_VERSIONS}) - if(DEFINED Clang_FIND_VERSION AND Clang_FIND_VERSION VERSION_EQUAL version) - find_package(LLVM ${version} PATHS ${LLVM_ROOT}) - else() - find_package(LLVM PATHS ${LLVM_ROOT}) - endif() -endforeach() - -if (${Clang_FIND_REQUIRED}) - if(NOT DEFINED LLVM_FOUND) - message(SEND_ERROR "Could not find LLVM (or Clang for that matter)") - else() - message("Found LLVM version ${LLVM_VERSION}") - endif() -endif() - -set(CLANG_FOUND FALSE) - -if(LLVM_FOUND AND LLVM_LIBRARY_DIRS) - message("Searching for clang libraries...") - macro(FIND_AND_ADD_CLANG_LIB _libname_) - # message("Searching for ${LLVM_LIBRARY_DIRS}/lib${_libname_}-${Clang_FIND_VERSION}.so.1") - string(TOUPPER ${_libname_} _prettylibname_) - find_library(CLANG_${_prettylibname_}_LIB - NAMES - ${_libname_}-${Clang_FIND_VERSION}.so.1 lib${_libname_}-${Clang_FIND_VERSION}.so.1 - ${_libname_}-${Clang_FIND_VERSION} lib${_libname_}-${Clang_FIND_VERSION} - ${_libname_}.so.1 lib${_libname_}.so.1 - ${_libname_} lib${_libname_} - HINTS - ${LLVM_LIBRARY_DIRS} ${ARGN}) - if(CLANG_${_prettylibname_}_LIB) - message("Found ${CLANG_${_prettylibname_}_LIB}") - set(CLANG_LIBS ${CLANG_LIBS} ${CLANG_${_prettylibname_}_LIB}) - endif() - endmacro(FIND_AND_ADD_CLANG_LIB) - - FIND_AND_ADD_CLANG_LIB(clangFrontend) - - # note: On Windows there's 'libclang.dll' instead of 'clang.dll' -> search for 'libclang', too - FIND_AND_ADD_CLANG_LIB(clang NAMES clang libclang clang-${Clang_FIND_VERSION} libclang-${Clang_FIND_VERSION}) # LibClang: high-level C interface - - FIND_AND_ADD_CLANG_LIB(clangDriver) - FIND_AND_ADD_CLANG_LIB(clangCodeGen) - FIND_AND_ADD_CLANG_LIB(clangSema) - FIND_AND_ADD_CLANG_LIB(clangChecker) - FIND_AND_ADD_CLANG_LIB(clangAnalysis) - FIND_AND_ADD_CLANG_LIB(clangRewriteFrontend) - FIND_AND_ADD_CLANG_LIB(clangRewrite) - FIND_AND_ADD_CLANG_LIB(clangAST) - FIND_AND_ADD_CLANG_LIB(clangParse) - FIND_AND_ADD_CLANG_LIB(clangLex) - FIND_AND_ADD_CLANG_LIB(clangBasic) - FIND_AND_ADD_CLANG_LIB(clangARCMigrate) - FIND_AND_ADD_CLANG_LIB(clangEdit) - FIND_AND_ADD_CLANG_LIB(clangFrontendTool) - FIND_AND_ADD_CLANG_LIB(clangSerialization) - FIND_AND_ADD_CLANG_LIB(clangTooling) - FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCheckers) - FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCore) - FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerFrontend) - FIND_AND_ADD_CLANG_LIB(clangRewriteCore) -endif() - -if(CLANG_LIBS OR CLANG_CLANG_LIB) - set(CLANG_FOUND TRUE) -else() - message(STATUS "Could not find any Clang libraries in ${LLVM_LIBRARY_DIRS}") -endif() - -if(CLANG_FOUND) - set(CLANG_LIBRARY_DIRS ${LLVM_LIBRARY_DIRS}) - set(CLANG_INCLUDE_DIRS ${LLVM_INCLUDE_DIRS}) - set(CLANG_VERSION ${LLVM_VERSION}) - - # svn version of clang has a svn suffix "8.0.0svn" but installs the header in "8.0.0", without the suffix - string(REPLACE "svn" "" CLANG_VERSION_CLEAN "${CLANG_VERSION}") - # dito for git - string(REPLACE "git" "" CLANG_VERSION_CLEAN "${CLANG_VERSION}") - - find_path(CLANG_BUILTIN_DIR - # cpuid.h because it is defined in ClangSupport constructor as valid clang builtin dir indicator - NAMES "cpuid.h" - PATHS "${CLANG_LIBRARY_DIRS}" - "${CLANG_INCLUDE_DIRS}" - PATH_SUFFIXES "clang/${CLANG_VERSION}/include" - "../../../clang/${CLANG_VERSION}/include" - "clang/${CLANG_VERSION_CLEAN}/include" - "../../../clang/${CLANG_VERSION_CLEAN}/include" - NO_DEFAULT_PATH - ) - - if (NOT CLANG_BUILTIN_DIR) - message(FATAL_ERROR "Could not find Clang builtin directory") - endif() - get_filename_component(CLANG_BUILTIN_DIR ${CLANG_BUILTIN_DIR} ABSOLUTE) - - # check whether llvm-config comes from an install prefix - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --src-root - OUTPUT_VARIABLE _llvmSourceRoot - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(FIND "${LLVM_INCLUDE_DIRS}" "${_llvmSourceRoot}" _llvmIsInstalled) - if (NOT _llvmIsInstalled) - message(STATUS "Detected that llvm-config comes from a build-tree, adding more include directories for Clang") - list(APPEND CLANG_INCLUDE_DIRS - "${LLVM_INSTALL_PREFIX}/tools/clang/include" # build dir - ) - - # check whether the source is from llvm-project.git (currently recommended way to clone the LLVM projects) - # contains all LLVM projects in the top-level directory - get_filename_component(_llvmProjectClangIncludeDir ${_llvmSourceRoot}/../clang/include REALPATH) - if (EXISTS ${_llvmProjectClangIncludeDir}) - message(STATUS " Note: llvm-project.git structure detected, using different include path pointing into source dir") - list(APPEND CLANG_INCLUDE_DIRS "${_llvmProjectClangIncludeDir}") # source dir - else() - list(APPEND CLANG_INCLUDE_DIRS "${_llvmSourceRoot}/tools/clang/include") # source dir - endif() - endif() - - # if the user specified LLVM_ROOT, use that and fail otherwise - if (LLVM_ROOT) - find_program(CLANG_EXECUTABLE NAMES clang HINTS ${LLVM_ROOT}/bin DOC "clang executable" NO_DEFAULT_PATH) - elseif (NOT CLANG_EXECUTABLE) - # find clang, prefer the one with a version suffix, e.g. clang-3.5 - # note: FreeBSD installs clang as clang35 and so on - # note: on some distributions, only 'clang' is shipped, so let's always try to fallback on that - string(REPLACE "." "" Clang_FIND_VERSION_CONCAT ${Clang_FIND_VERSION}) - find_program(CLANG_EXECUTABLE NAMES clang-${Clang_FIND_VERSION} clang${Clang_FIND_VERSION_CONCAT} clang DOC "clang executable") - endif() - - message(STATUS "Found Clang (LLVM version: ${CLANG_VERSION})") - message(STATUS " Include dirs: ${CLANG_INCLUDE_DIRS}") - message(STATUS " Clang libraries: ${CLANG_LIBS}") - message(STATUS " Libclang C library: ${CLANG_CLANG_LIB}") - message(STATUS " Builtin include dir: ${CLANG_BUILTIN_DIR}") - message(STATUS " Clang executable: ${CLANG_EXECUTABLE}") -else() - if(Clang_FIND_REQUIRED) - message(FATAL_ERROR "Could NOT find Clang") - endif() -endif() diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake deleted file mode 100644 index 9e94b2d509..0000000000 --- a/cmake/FindLLVM.cmake +++ /dev/null @@ -1,173 +0,0 @@ -# Find the native LLVM includes and libraries -# -# Defines the following variables -# LLVM_INCLUDE_DIRS - where to find llvm include files -# LLVM_LIBRARY_DIRS - where to find llvm libs -# LLVM_CFLAGS - llvm compiler flags -# LLVM_LFLAGS - llvm linker flags -# LLVM_MODULE_LIBS - list of llvm libs for working with modules. -# LLVM_INSTALL_PREFIX - LLVM installation prefix -# LLVM_FOUND - True if llvm found. -# LLVM_VERSION - Version string ("llvm-config --version") -# -# This module reads hints about search locations from variables -# LLVM_ROOT - Preferred LLVM installation prefix (containing bin/, lib/, ...) -# -# Note: One may specify these as environment variables if they are not specified as -# CMake variables or cache entries. - -#============================================================================= -# Copyright 2014 Kevin Funk -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= - -if(NOT LLVM_ROOT AND DEFINED ENV{LLVM_ROOT}) - file(TO_CMAKE_PATH "$ENV{LLVM_ROOT}" LLVM_ROOT) -endif() - -# if the user specified LLVM_ROOT, use that and fail otherwise -if(LLVM_ROOT) - find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config HINTS ${LLVM_ROOT}/bin DOC "llvm-config executable" NO_DEFAULT_PATH) -elseif(NOT LLVM_CONFIG_EXECUTABLE) - # find llvm-config, prefer the one with a version suffix, e.g. llvm-config-3.5 - # note: FreeBSD installs llvm-config as llvm-config35 and so on - # note: on some distributions, only 'llvm-config' is shipped, so let's always try to fallback on that - string(REPLACE "." "" LLVM_FIND_VERSION_CONCAT_PREFIX ${LLVM_FIND_VERSION}) - list(APPEND LLVM_FIND_VERSION_CONCAT llvm-config${LLVM_FIND_VERSION_CONCAT_PREFIX}) - - foreach(i RANGE 0 9) - list(APPEND LLVM_FIND_VERSION_CONCAT llvm-config${LLVM_FIND_VERSION_CONCAT_PREFIX}${i}) - endforeach() - message("llvm-config list: ${LLVM_FIND_VERSION_CONCAT}") - - find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-${LLVM_FIND_VERSION} ${LLVM_FIND_VERSION_CONCAT} llvm-config DOC "llvm-config executable") - - # other distributions don't ship llvm-config, but only some llvm-config-VERSION binary - # try to deduce installed LLVM version by looking up llvm-nm in PATH and *then* find llvm-config-VERSION via that - if(NOT LLVM_CONFIG_EXECUTABLE) - find_program(_llvmNmExecutable llvm-nm) - if(_llvmNmExecutable) - execute_process(COMMAND ${_llvmNmExecutable} --version OUTPUT_VARIABLE _out) - string(REGEX REPLACE ".*LLVM version ([^ \n]+).*" "\\1" _versionString "${_out}") - find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-${_versionString} DOC "llvm-config executable") - endif() - endif() -endif() - -set(LLVM_FOUND FALSE) - -if(LLVM_CONFIG_EXECUTABLE) - # verify that we've found the correct version of llvm-config - execute_process(COMMAND ${LLVM_CONFIG_EXECUTABLE} --version - OUTPUT_VARIABLE LLVM_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - - if(NOT LLVM_VERSION) - set(_LLVM_ERROR_MESSAGE "Failed to parse version from ${LLVM_CONFIG_EXECUTABLE}") - unset(LLVM_CONFIG_EXECUTABLE CACHE) - elseif(LLVM_FIND_VERSION VERSION_GREATER LLVM_VERSION) - set(_LLVM_ERROR_MESSAGE "${LLVM_CONFIG_EXECUTABLE} (version ${LLVM_VERSION}) unsuitable: too old for requested version ${LLVM_FIND_VERSION}") - unset(LLVM_CONFIG_EXECUTABLE CACHE) - else() - set(LLVM_FOUND TRUE) - endif() -else() - set(_LLVM_ERROR_MESSAGE "Could NOT find 'llvm-config' executable") -endif() - -if(LLVM_FOUND) - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --includedir - OUTPUT_VARIABLE LLVM_INCLUDE_DIRS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --libdir - OUTPUT_VARIABLE LLVM_LIBRARY_DIRS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --cppflags - OUTPUT_VARIABLE LLVM_CFLAGS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --ldflags - OUTPUT_VARIABLE LLVM_LFLAGS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --libs core bitreader asmparser analysis - OUTPUT_VARIABLE LLVM_MODULE_LIBS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --libfiles - OUTPUT_VARIABLE LLVM_LIBS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --libdir - OUTPUT_VARIABLE LLVM_LIB_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - #execute_process stores results in a string, and we need a list. - string(REGEX MATCHALL "${LLVM_LIB_DIR}[^ ]*" LLVM_LIBS ${LLVM_LIBS}) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --prefix - OUTPUT_VARIABLE LLVM_INSTALL_PREFIX - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - if(NOT ${LLVM_VERSION} VERSION_LESS "3.8.0") - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --shared-mode - OUTPUT_VARIABLE _LLVM_SHARED_MODE - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - if(_LLVM_SHARED_MODE STREQUAL "shared") - set(LLVM_SHARED_MODE ON) - else() - set(LLVM_SHARED_MODE OFF) - endif() - else() - set(LLVM_SHARED_MODE OFF) - endif() - - # potentially add include dir from binary dir for non-installed LLVM - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --src-root - OUTPUT_VARIABLE _llvmSourceRoot - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(FIND "${LLVM_INCLUDE_DIRS}" "${_llvmSourceRoot}" _llvmIsInstalled) - if(NOT _llvmIsInstalled) - list(APPEND LLVM_INCLUDE_DIRS "${LLVM_INSTALL_PREFIX}/include") - endif() -endif() - -if(LLVM_FIND_REQUIRED AND NOT LLVM_FOUND) - message(FATAL_ERROR "Could not find LLVM: ${_LLVM_ERROR_MESSAGE}") -elseif(_LLVM_ERROR_MESSAGE) - message(STATUS "Could not find LLVM: ${_LLVM_ERROR_MESSAGE}") -endif() - -if(LLVM_FOUND) - message(STATUS "Found LLVM (version: ${LLVM_VERSION}): (using ${LLVM_CONFIG_EXECUTABLE})") - message(STATUS " Include dirs: ${LLVM_INCLUDE_DIRS}") - message(STATUS " LLVM libraries: ${LLVM_LIBS}") -endif() diff --git a/cmake/Version.cmake b/cmake/Version.cmake deleted file mode 100644 index 8ac4849c97..0000000000 --- a/cmake/Version.cmake +++ /dev/null @@ -1,11 +0,0 @@ -# Converts a version such as 1.2.255 to 0x0102ff -function(HexVersion version_hex_var major minor patch) - math(EXPR version_dec "${major} * 256 * 256 + ${minor} * 256 + ${patch}") - set(version_hex "0x") - foreach(i RANGE 5 0 -1) - math(EXPR num "(${version_dec} >> (4 * ${i})) & 15") - string(SUBSTRING "0123456789abcdef" ${num} 1 num_hex) - set(version_hex "${version_hex}${num_hex}") - endforeach() - set(${version_hex_var} "${version_hex}" PARENT_SCOPE) -endfunction() diff --git a/libclambcc/CMakeLists.txt b/libclambcc/CMakeLists.txt index 4812a8f87c..6ce9fdd8aa 100644 --- a/libclambcc/CMakeLists.txt +++ b/libclambcc/CMakeLists.txt @@ -1,89 +1,26 @@ # Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. -# -# The clambcc object library -# -add_library(clambcc_obj OBJECT) -target_sources(clambcc_obj - PRIVATE - ClamBCLowering/ClamBCLowering.cpp - ClamBCVerifier/ClamBCVerifier.cpp - ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp - ClamBCRebuild/ClamBCRebuild.cpp - ClamBCTrace/ClamBCTrace.cpp - ClamBCModule/ClamBCModule.cpp - ClamBCWriter/ClamBCWriter.cpp - ClamBCAnalyzer/ClamBCAnalyzer.cpp - Common/ClamBCDiagnostics.cpp - Common/ClamBCUtilities.cpp - Common/ClamBCRegAlloc.cpp - Common/version.c - ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp - ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp - ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp - ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp - ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp - ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp - ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp - ClamBCPreserveABIs/ClamBCPreserveABIs.cpp - ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp -) -target_include_directories(clambcc_obj - PRIVATE - ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) - . # For Common/clambc.h - Common # For clambc.h #TODO: change all passes to use "Common" and then delete this line. - ${LLVM_INCLUDE_DIRS} -) - -set_target_properties(clambcc_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") - -# -# For testing -# -#target_compile_definitions(clambc_obj -DLOG_BEFORE_AFTER=1) - -# -# The clambcc shared library. -# -add_library( clambcc SHARED ) -target_link_libraries( clambcc - PUBLIC - clambcc_obj ) -set_target_properties( clambcc PROPERTIES - VERSION ${LIBCLAMBC_VERSION} - SOVERSION ${LIBCLAMBC_SOVERSION} ) - -target_link_directories(clambcc PRIVATE ${LLVM_LIBRARY_DIRS}) -target_link_libraries(clambcc PUBLIC ${LLVM_LIBS}) - -if(WIN32) - install(TARGETS clambcc DESTINATION .) - - # Also install shared library (DLL) dependencies - install(CODE [[ - file(GET_RUNTIME_DEPENDENCIES - LIBRARIES - $ - RESOLVED_DEPENDENCIES_VAR _r_deps - UNRESOLVED_DEPENDENCIES_VAR _u_deps - DIRECTORIES - ${LLVM_LIBRARY_DIRS} - ) - foreach(_file ${_r_deps}) - string(TOLOWER ${_file} _file_lower) - if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") - file(INSTALL - DESTINATION "${CMAKE_INSTALL_PREFIX}" - TYPE SHARED_LIBRARY - FOLLOW_SYMLINK_CHAIN - FILES "${_file}" - ) - endif() - endforeach() - #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") - ]]) -else() - install(TARGETS clambcc DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() +add_subdirectory(ClamBCLogicalCompiler) +add_subdirectory(ClamBCLogicalCompilerHelper) +add_subdirectory(ClamBCRemoveUndefs) +add_subdirectory(ClamBCPreserveABIs) +add_subdirectory(ClamBCAnalyzer) +add_subdirectory(Common) +add_subdirectory(ClamBCVerifier) +#add_subdirectory(ClamBCRemovePointerPHIs) +add_subdirectory(ClamBCLowering) +add_subdirectory(ClamBCRemoveFreezeInsts) +add_subdirectory(ClamBCWriter) +add_subdirectory(ClamBCTrace) +add_subdirectory(ClamBCOutlineEndiannessCalls) +add_subdirectory(ClamBCChangeMallocArgSize) +add_subdirectory(ClamBCExtendPHIsTo64Bit) +add_subdirectory(ClamBCRebuild) +add_subdirectory(ClamBCRegAlloc) +add_subdirectory(ClamBCConvertMemsetsTo32Bit) +add_subdirectory(ClamBCPrepareGEPsForWriter) +add_subdirectory(ClamBCRemoveICMPSLE) +add_subdirectory(ClamBCRemoveUSUB) +add_subdirectory(ClamBCRemoveUnsupportedICMPIntrinsics) +add_subdirectory(ClamBCRemoveFSHL) diff --git a/libclambcc/ClamBCAnalyzer/CMakeLists.txt b/libclambcc/ClamBCAnalyzer/CMakeLists.txt new file mode 100644 index 0000000000..03a46bc545 --- /dev/null +++ b/libclambcc/ClamBCAnalyzer/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcanalyzer object library +# +add_library(clambcanalyzer_obj OBJECT) +target_sources(clambcanalyzer_obj + PRIVATE + ClamBCAnalyzer.cpp +) + +target_include_directories(clambcanalyzer_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcanalyzer_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcanalyzer_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcanalyzer shared library. +# +add_library( clambcanalyzer SHARED ) +target_link_libraries( clambcanalyzer + PUBLIC + clambcanalyzer_obj ) +set_target_properties( clambcanalyzer PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcanalyzer PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcanalyzer PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcanalyzer DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcanalyzer DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp index 0527edaa83..385b32d658 100644 --- a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp +++ b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp @@ -37,7 +37,7 @@ using namespace llvm; -extern cl::opt WriteDI; +AnalysisKey ClamBCAnalyzer::Key; static unsigned getSpecialIndex(StringRef Name) { @@ -67,58 +67,55 @@ static bool compare_lt_functions(Function *A, Function *B) return NA.compare(NB) < 0; } -bool ClamBCAnalyzer::runOnModule(Module &M) +void ClamBCAnalysis::run(Module & m) { - pMod = &M; + pMod = &m; // Determine bytecode kind, default is 0 (generic). kind = 0; - GlobalVariable *GVKind = M.getGlobalVariable("__clambc_kind"); + GlobalVariable *GVKind = pMod->getGlobalVariable("__clambc_kind"); if (GVKind && GVKind->hasDefinitiveInitializer()) { kind = cast(GVKind->getInitializer())->getValue().getZExtValue(); // GVKind->setLinkage(GlobalValue::InternalLinkage); // Do not set the linkage type to internal, because the optimizer will remove it. if (kind >= 65536) { - ClamBCStop("Bytecode kind cannot be higher than 64k\n", &M); + ClamBCStop("Bytecode kind cannot be higher than 64k\n", pMod); } } - GlobalVariable *G = M.getGlobalVariable("__Copyright"); + GlobalVariable *G = pMod->getGlobalVariable("__Copyright"); if (G && G->hasDefinitiveInitializer()) { Constant *C = G->getInitializer(); // std::string c; StringRef c; if (!getConstantStringInfo(C, c)) { - ClamBCStop("Failed to extract copyright string\n", &M); + ClamBCStop("Failed to extract copyright string\n", pMod); } - // copyright = strdup(c.c_str()); copyright = c.str(); - // G->setLinkage(GlobalValue::InternalLinkage); // Do not set the linkage type to internal because the optimizer will remove it. } // Logical signature created by ClamBCLogicalCompiler. - NamedMDNode *Node = M.getNamedMetadata("clambc.logicalsignature"); + NamedMDNode *Node = pMod->getNamedMetadata("clambc.logicalsignature"); logicalSignature = Node ? cast(Node->getOperand(0)->getOperand(0))->getString() : ""; - Node = M.getNamedMetadata("clambc.virusnames"); + Node = pMod->getNamedMetadata("clambc.virusnames"); virusnames = Node ? cast(Node->getOperand(0)->getOperand(0))->getString() : ""; unsigned tid, fid; // unsigned cid; - startTID = tid = clamav::initTypeIDs(typeIDs, M.getContext()); + startTID = tid = clamav::initTypeIDs(typeIDs, pMod->getContext()); // arrays of [2 x i8] .. [7 x i8] used for struct padding for (unsigned i = 1; i < 8; i++) { - const Type *Ty = llvm::ArrayType::get(llvm::Type::getInt8Ty(M.getContext()), + const Type *Ty = llvm::ArrayType::get(llvm::Type::getInt8Ty(pMod->getContext()), i); typeIDs[Ty] = tid++; extraTypes.push_back(Ty); } std::vector types; - // cid=1; fid = 1; - for (Module::global_iterator I = M.global_begin(); I != M.global_end(); ++I) { + for (Module::global_iterator I = pMod->global_begin(); I != pMod->global_end(); ++I) { GlobalVariable *gv = llvm::cast(I); std::set insts; std::set globs; @@ -136,14 +133,14 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // globals, so introduce helper globals for nested constant expressions. if (CE->getOpcode() != Instruction::GetElementPtr) { if (CE->getOpcode() == Instruction::BitCast) { - GlobalVariable *GV = new GlobalVariable(M, CE->getType(), true, + GlobalVariable *GV = new GlobalVariable(*pMod, CE->getType(), true, GlobalValue::InternalLinkage, CE, I->getName() + "_bc"); CEMap[CE] = GV; continue; } errs() << "UNSUPPORTED: " << *CE << "\n"; - ClamBCStop("Unsupported constant expression", &M); + ClamBCStop("Unsupported constant expression", pMod); } ConstantInt *C0 = dyn_cast(CE->getOperand(1)); ConstantInt *C1 = dyn_cast(CE->getOperand(2)); @@ -152,7 +149,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) errs() << "UNSUPPORTED: " << *CE << "\n"; ClamBCStop("Unsupported constant expression, nonzero first" " index", - &M); + pMod); } const DataLayout &dataLayout = pMod->getDataLayout(); @@ -161,18 +158,15 @@ bool ClamBCAnalyzer::runOnModule(Module &M) indices.push_back(CE->getOperand(i)); } Type *IP8Ty = PointerType::getUnqual(Type::getInt8Ty(CE->getContext())); + Type * type = getResultType(CE); - Type *type = CE->getOperand(0)->getType(); - if (llvm::isa(type)) { - type = llvm::cast(type)->getElementType(); - } uint64_t idx = dataLayout.getIndexedOffsetInType(type, indices); Value *Idxs[1]; Idxs[0] = ConstantInt::get(Type::getInt64Ty(CE->getContext()), idx); Constant *C = ConstantExpr::getPointerCast(CE->getOperand(0), IP8Ty); ConstantExpr *NewCE = - cast(ConstantExpr::getGetElementPtr(nullptr, C, + cast(ConstantExpr::getGetElementPtr(C->getType(), C, Idxs)); NewCE = cast(ConstantExpr::getPointerCast(NewCE, CE->getType())); @@ -180,7 +174,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) CE->replaceAllUsesWith(NewCE); } CE = NewCE; - GlobalVariable *GV = new GlobalVariable(M, CE->getType(), true, + GlobalVariable *GV = new GlobalVariable(*pMod, CE->getType(), true, GlobalValue::InternalLinkage, CE, I->getName() + "_" + Twine(v)); @@ -190,6 +184,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // Collect types of all globals. const Type *Ty = I->getType(); + Ty = I->getValueType(); if (!typeIDs.count(Ty)) { extraTypes.push_back(Ty); typeIDs[Ty] = tid++; @@ -199,30 +194,34 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // Sort functions. std::vector functions; - for (Module::iterator I = M.begin(), E = M.end(); I != E;) { + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E;) { Function *F = &*I; ++I; functions.push_back(F); - F->removeFromParent(); } + + for (size_t i = 0; i < functions.size(); i++){ + functions[i]->removeFromParent(); + } + std::sort(functions.begin(), functions.end(), compare_lt_functions); for (std::vector::iterator I = functions.begin(), E = functions.end(); I != E; ++I) { - M.getFunctionList().push_back(*I); + pMod->getFunctionList().push_back(*I); } - Function *ep = M.getFunction("entrypoint"); + Function *ep = pMod->getFunction("entrypoint"); if (!ep) { - ClamBCStop("Bytecode must define an entrypoint (with 0 parameters)!\n", &M); + ClamBCStop("Bytecode must define an entrypoint (with 0 parameters)!\n", pMod); } if (ep->getFunctionType()->getNumParams() != 0) { - ClamBCStop("Bytecode must define an entrypoint with 0 parameters!\n", &M); + ClamBCStop("Bytecode must define an entrypoint with 0 parameters!\n", pMod); } unsigned dbgid = 0; - unsigned MDDbgKind = M.getContext().getMDKindID("dbg"); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + unsigned MDDbgKind = pMod->getContext().getMDKindID("dbg"); + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { Function &F = *I; if (F.isDeclaration()) { // Don't add prototypes of debug intrinsics @@ -261,6 +260,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) extraTypes.push_back(Ty); typeIDs[Ty] = tid++; } + for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { const Type *Ty; // Skip debug intrinsics, so we don't add llvm.dbg.* types @@ -282,6 +282,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) } else { Ty = II->getType(); } + if (const GetElementPtrInst *GEPI = dyn_cast(&*II)) { const Type *GTy = GEPI->getPointerOperand()->getType(); if (!typeIDs.count(GTy)) { @@ -290,6 +291,20 @@ bool ClamBCAnalyzer::runOnModule(Module &M) typeIDs[GTy] = tid++; } } + + for (size_t i = 0; i < II->getNumOperands(); i++){ + Value * operand = II->getOperand(i); + if (llvm::isa(operand)){ + continue; + } + Type * pt = operand->getType(); + if (0 == typeIDs.count(pt)){ + types.push_back(pt); + extraTypes.push_back(pt); + typeIDs[pt] = tid++; + } + } + if (typeIDs.count(Ty)) { continue; } @@ -316,7 +331,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) continue; } DEBUGERR << *STy << "\n"; - ClamBCStop("Bytecode cannot use abstract types (only pointers to them)!", &M); + ClamBCStop("Bytecode cannot use abstract types (only pointers to them)!", pMod); } } if (!typeIDs.count(STy)) { @@ -328,21 +343,18 @@ bool ClamBCAnalyzer::runOnModule(Module &M) } if (tid >= 65536) { - ClamBCStop("Attempted to use more than 64k types", &M); + ClamBCStop("Attempted to use more than 64k types", pMod); } printGlobals(startTID); - - return false; } -void ClamBCAnalyzer::printGlobals(uint16_t stid) +void ClamBCAnalysis::printGlobals(uint16_t stid) { llvm::Module &M = *pMod; // Describe types maxApi = 0; - // std::vector apis; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { llvm::Function *pFunc = llvm::cast(I); // Skip dead declarations if (I->use_empty()) { @@ -392,20 +404,19 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) for (StringMap::iterator I = globalsMap.begin(), E = globalsMap.end(); I != E; ++I) { - if (GlobalVariable *GV = M.getGlobalVariable(I->getKey())) { + if (GlobalVariable *GV = pMod->getGlobalVariable(I->getKey())) { specialGlobals.insert(GV); globals[GV] = I->getValue(); if (I->getValue() > maxGlobal) maxGlobal = I->getValue(); } } - if (GlobalVariable *GV = M.getGlobalVariable("__clambc_kind")) { + if (GlobalVariable *GV = pMod->getGlobalVariable("__clambc_kind")) { specialGlobals.insert(GV); } - // std::vector globalInits; globalInits.push_back(0); // ConstantPointerNul placeholder - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { + for (Module::global_iterator I = pMod->global_begin(), E = pMod->global_end(); I != E; ++I) { GlobalVariable *pgv = llvm::cast(I); if (specialGlobals.count(pgv)) { continue; @@ -433,8 +444,9 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) &M); } Constant *C = pgv->getInitializer(); - if (C->use_empty()) + if (C->use_empty()) { continue; + } globalInits.push_back(C); globals[pgv] = i++; if (i >= 32768) { @@ -473,7 +485,7 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) } // need to use bytecode_api_decl.c.h -void ClamBCAnalyzer::populateAPIMap() +void ClamBCAnalysis::populateAPIMap() { unsigned id = 1; apiMap["test1"] = id++; @@ -585,14 +597,19 @@ void ClamBCAnalyzer::populateAPIMap() apiMap["bzip2_done"] = id++; } -void ClamBCAnalyzer::getAnalysisUsage(AnalysisUsage &AU) const -{ - // Preserve the CFG, we only eliminate PHIs, and introduce some - // loads/stores. - AU.setPreservesAll(); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCAnalysis", "v0.1", + [](PassBuilder &PB) { + PB.registerAnalysisRegistrationCallback( + [](ModuleAnalysisManager &mam) { + mam.registerPass([] () { return ClamBCAnalyzer(); } ); + } + ); + } + }; } -char ClamBCAnalyzer::ID = 0; -static RegisterPass X("clambc-analyzer", - "ClamAV bytecode register allocator"); -const PassInfo *const ClamBCAnalyzerID = &X; + diff --git a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h index aec37d78cc..b83b39f417 100644 --- a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h +++ b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h @@ -24,15 +24,20 @@ #include "Common/clambc.h" -#include -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" +#include +#include +#include +#include +#include #include #include -#include "llvm/Support/raw_ostream.h" +#include + +#include +#include +#include + +#include #include #include @@ -45,7 +50,8 @@ //5. Cannot see where banMap has any functions inserted. Do we need it? //6. Evaluate the TODO in runOnModule. -class ClamBCAnalyzer : public llvm::ModulePass +//class ClamBCAnalyzer : public llvm::PassInfoMixin //llvm::ModulePass +class ClamBCAnalysis { protected: typedef llvm::DenseMap TypeMapTy; @@ -76,7 +82,8 @@ class ClamBCAnalyzer : public llvm::ModulePass unsigned maxGlobal = 0; std::vector globalInits; std::vector mds; - + bool WriteDI = false; + virtual void printGlobals(uint16_t stid); /* TODO @@ -102,8 +109,8 @@ class ClamBCAnalyzer : public llvm::ModulePass public: static char ID; - explicit ClamBCAnalyzer() - : ModulePass(ID) + explicit ClamBCAnalysis() + //: ModulePass(ID) { populateAPIMap(); @@ -117,14 +124,27 @@ class ClamBCAnalyzer : public llvm::ModulePass globalsMap["__clambc_match_offsets"] = GLOBAL_MATCH_OFFSETS; } - ~ClamBCAnalyzer() {} - virtual bool runOnModule(llvm::Module &m) override; + virtual uint32_t getHighestTID(){ + uint32_t ret = 0; + for (auto i = typeIDs.begin(), e = typeIDs.end(); i != e; i++) { + if (i->second > ret){ + ret = i->second; + } + } + return ret; + } - virtual void getAnalysisUsage(llvm::AnalysisUsage &au) const override; + ~ClamBCAnalysis() {} + virtual void run(llvm::Module & m); virtual uint32_t getTypeID(const llvm::Type *const t) { TypeMapTy::iterator I = typeIDs.find(t); + if (I == typeIDs.end()){ + DEBUG_NONPOINTER("BAD VALUE"); + DEBUG_VALUE(t); + } + assert((I != typeIDs.end()) && "Type ID requested for unknown type"); return I->second; } @@ -245,4 +265,25 @@ class ClamBCAnalyzer : public llvm::ModulePass } }; +class ClamBCAnalyzer : public llvm::AnalysisInfoMixin +{ + protected: + ClamBCAnalysis clamBCAnalysis; + + public: + friend llvm::AnalysisInfoMixin ; + static llvm::AnalysisKey Key; + + ClamBCAnalyzer() : clamBCAnalysis() {} + virtual ~ClamBCAnalyzer(){} + + typedef ClamBCAnalysis Result; + + ClamBCAnalysis & run(llvm::Module & mod, llvm::ModuleAnalysisManager & mam) { + clamBCAnalysis.run(mod); + + return clamBCAnalysis; + } +}; + #endif //CLAMBC_ANALYZER_H_ diff --git a/libclambcc/ClamBCChangeMallocArgSize/CMakeLists.txt b/libclambcc/ClamBCChangeMallocArgSize/CMakeLists.txt new file mode 100644 index 0000000000..eea511e129 --- /dev/null +++ b/libclambcc/ClamBCChangeMallocArgSize/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcchangemallocargsize object library +# +add_library(clambcchangemallocargsize_obj OBJECT) +target_sources(clambcchangemallocargsize_obj + PRIVATE + ClamBCChangeMallocArgSize.cpp +) + +target_include_directories(clambcchangemallocargsize_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcchangemallocargsize_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcchangemallocargsize_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcchangemallocargsize shared library. +# +add_library( clambcchangemallocargsize SHARED ) +target_link_libraries( clambcchangemallocargsize + PUBLIC + clambcchangemallocargsize_obj ) +set_target_properties( clambcchangemallocargsize PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcchangemallocargsize PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcchangemallocargsize PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcchangemallocargsize DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcchangemallocargsize DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp b/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp index 65111698bd..d71977b18d 100644 --- a/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp +++ b/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp @@ -1,162 +1,182 @@ #include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include + +#include +#include #include "Common/clambc.h" using namespace llvm; -namespace -{ -class ChangeMallocArgSize : public ModulePass -{ - protected: - std::vector changeValues; +namespace ChangeMallocArgSize { + class ChangeMallocArgSize : public PassInfoMixin + { + protected: + std::vector changeValues; - Module* pMod = nullptr; - IntegerType* dstType = nullptr; + Module* pMod = nullptr; + IntegerType* dstType = nullptr; - void addChangeValue(PHINode* pv) - { - if (llvm::isa(pv)) { - return; - } + void addChangeValue(PHINode* pv) + { + if (llvm::isa(pv)) { + return; + } - if (changeValues.end() == std::find(changeValues.begin(), changeValues.end(), pv)) { - changeValues.push_back(pv); - } - } + if (changeValues.end() == std::find(changeValues.begin(), changeValues.end(), pv)) { + changeValues.push_back(pv); + } + } - void findSizes(BasicBlock* pBB) - { - for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { - CallInst* pCall = llvm::dyn_cast(i); - if (pCall) { - if ("malloc" == pCall->getCalledValue()->getName()) { - Value* pv = pCall->getOperand(0); - if (PHINode* pn = llvm::dyn_cast(pv)) { - addChangeValue(pn); + void findSizes(BasicBlock* pBB) + { + for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { + CallInst* pCall = llvm::dyn_cast(i); + if (pCall) { + Function * pFunc = pCall->getCalledFunction(); + if (pFunc && ("malloc" == pFunc->getName())) { + Value* pv = pCall->getOperand(0); + if (PHINode* pn = llvm::dyn_cast(pv)) { + addChangeValue(pn); + } + } } } } - } - } - void findSizes(Function* pFunc) - { - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - findSizes(llvm::cast(i)); - } - } + void findSizes(Function* pFunc) + { + for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { + findSizes(llvm::cast(i)); + } + } - void findSizes() - { - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - findSizes(llvm::cast(i)); - } - } - - /* Yes, I know there is a "getTerminator" function, but I have come across blocks - * that have more than one branch instruction (I think it is a bug in the runtime), but - * until that is resolved, I want to use this function. - */ - Instruction* findTerminator(BasicBlock* pb) - { - Instruction* inst = nullptr; - for (auto i = pb->begin(), e = pb->end(); i != e; i++) { - inst = llvm::cast(i); - if (llvm::isa(inst) || llvm::isa(inst)) { - break; + void findSizes() + { + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + findSizes(llvm::cast(i)); + } } - } - assert(inst && "Impossible, there is always a terminator."); - assert(inst == pb->getTerminator() && "How did this happen"); - return inst; - } + /* Yes, I know there is a "getTerminator" function, but I have come across blocks + * that have more than one branch instruction (I think it is a bug in the runtime), but + * until that is resolved, I want to use this function. + */ + Instruction* findTerminator(BasicBlock* pb) + { + Instruction* inst = nullptr; + for (auto i = pb->begin(), e = pb->end(); i != e; i++) { + inst = llvm::cast(i); + if (llvm::isa(inst) || llvm::isa(inst)) { + break; + } + } + assert(inst && "Impossible, there is always a terminator."); + assert(inst == pb->getTerminator() && "How did this happen"); + + return inst; + } - PHINode* getNewPHI(PHINode* pn) - { + PHINode* getNewPHI(PHINode* pn) + { - PHINode* newPN = PHINode::Create(dstType, pn->getNumIncomingValues(), "ChangeMallocArgSize_", pn); - for (size_t i = 0; i < pn->getNumIncomingValues(); i++) { - Value* pv = pn->getIncomingValue(i); - BasicBlock* pb = pn->getIncomingBlock(i); - Instruction* bTerm = findTerminator(pb); + PHINode* newPN = PHINode::Create(dstType, pn->getNumIncomingValues(), "ChangeMallocArgSize_", pn); + for (size_t i = 0; i < pn->getNumIncomingValues(); i++) { + Value* pv = pn->getIncomingValue(i); + BasicBlock* pb = pn->getIncomingBlock(i); + Instruction* bTerm = findTerminator(pb); - Instruction* pNew = CastInst::CreateZExtOrBitCast(pv, dstType, "ChangeMallocArgSize_zext_", bTerm); - newPN->addIncoming(pNew, pb); - } + Instruction* pNew = CastInst::CreateZExtOrBitCast(pv, dstType, "ChangeMallocArgSize_zext_", bTerm); + newPN->addIncoming(pNew, pb); + } - return newPN; - } + return newPN; + } - void fixBitWidths() - { + void fixBitWidths() + { - for (size_t i = 0; i < changeValues.size(); i++) { - PHINode* pn = changeValues[i]; + for (size_t i = 0; i < changeValues.size(); i++) { + PHINode* pn = changeValues[i]; - if (dstType != pn->getType()) { - PHINode* pRep = getNewPHI(pn); + if (dstType != pn->getType()) { + PHINode* pRep = getNewPHI(pn); - std::vector insts; + std::vector insts; - for (auto i = pn->user_begin(), e = pn->user_end(); i != e; i++) { - Instruction* inst = llvm::cast(*i); - insts.push_back(inst); - } - for (size_t i = 0; i < insts.size(); i++) { - Instruction* inst = insts[i]; - - if (PHINode* pn2 = llvm::dyn_cast(inst)) { - DEBUGERR << *pn2 << "\n"; - assert(0 && "SHOULD NEVER HAPPEN"); - } else { - auto* val = CastInst::CreateTruncOrBitCast(pRep, pn->getType(), "ChangeMallocArgSize_trunc_", inst); - - for (size_t j = 0; j < inst->getNumOperands(); j++) { - if (inst->getOperand(j) == pn) { - inst->setOperand(j, val); - break; + for (auto i = pn->user_begin(), e = pn->user_end(); i != e; i++) { + Instruction* inst = llvm::cast(*i); + insts.push_back(inst); + } + for (size_t i = 0; i < insts.size(); i++) { + Instruction* inst = insts[i]; + + if (PHINode* pn2 = llvm::dyn_cast(inst)) { + DEBUGERR << *pn2 << "\n"; + assert(0 && "SHOULD NEVER HAPPEN"); + } else { + auto* val = CastInst::CreateTruncOrBitCast(pRep, pn->getType(), "ChangeMallocArgSize_trunc_", inst); + + for (size_t j = 0; j < inst->getNumOperands(); j++) { + if (inst->getOperand(j) == pn) { + inst->setOperand(j, val); + break; + } + } } } + + pn->eraseFromParent(); } } + } - pn->eraseFromParent(); + public: + ChangeMallocArgSize() + { } - } - } - public: - static char ID; - ChangeMallocArgSize() - : ModulePass(ID) - { - } + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + DEBUGERR << "TODO: Evaluate whether or not we still need this." << "\n"; + dstType = Type::getInt64Ty(pMod->getContext()); - virtual bool runOnModule(Module& m) override - { - pMod = &m; - dstType = Type::getInt64Ty(pMod->getContext()); + findSizes(); + + fixBitWidths(); - findSizes(); + return PreservedAnalyses::none(); + } + }; // end of struct ChangeMallocArgSize +} // end of ChangeMallocArgSize namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ChangeMallocArgSize", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-change-malloc-arg-size"){ + FPM.addPass(ChangeMallocArgSize::ChangeMallocArgSize()); + return true; + } + return false; + } + ); + } + }; +} - fixBitWidths(); - return true; - } -}; // end of struct ChangeMallocArgSize -} // end of anonymous namespace -char ChangeMallocArgSize::ID = 0; -static RegisterPass X("clambc-change-malloc-arg-size", "ChangeMallocArgSize Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCConvertMemsetsTo32Bit/CMakeLists.txt b/libclambcc/ClamBCConvertMemsetsTo32Bit/CMakeLists.txt new file mode 100644 index 0000000000..49847ee586 --- /dev/null +++ b/libclambcc/ClamBCConvertMemsetsTo32Bit/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcconvertmemsetsto32bit object library +# +add_library(clambcconvertmemsetsto32bit_obj OBJECT) +target_sources(clambcconvertmemsetsto32bit_obj + PRIVATE + ClamBCConvertMemsetsTo32Bit.cpp +) + +target_include_directories(clambcconvertmemsetsto32bit_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcconvertmemsetsto32bit_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcconvertmemsetsto32bit_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcconvertmemsetsto32bit shared library. +# +add_library( clambcconvertmemsetsto32bit SHARED ) +target_link_libraries( clambcconvertmemsetsto32bit + PUBLIC + clambcconvertmemsetsto32bit_obj ) +set_target_properties( clambcconvertmemsetsto32bit PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcconvertmemsetsto32bit PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcconvertmemsetsto32bit PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcconvertmemsetsto32bit DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcconvertmemsetsto32bit DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp b/libclambcc/ClamBCConvertMemsetsTo32Bit/ClamBCConvertMemsetsTo32Bit.cpp similarity index 57% rename from libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp rename to libclambcc/ClamBCConvertMemsetsTo32Bit/ClamBCConvertMemsetsTo32Bit.cpp index 981d97029c..a1d8eafaae 100644 --- a/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp +++ b/libclambcc/ClamBCConvertMemsetsTo32Bit/ClamBCConvertMemsetsTo32Bit.cpp @@ -1,16 +1,18 @@ #include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include -#include "llvm/IR/DerivedTypes.h" +#include +#include +#include +#include #include - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include #include "Common/clambc.h" @@ -18,21 +20,19 @@ using namespace llvm; -namespace -{ +namespace ClamBCConvertMemsetsTo32Bit { -class ConvertIntrinsics : public ModulePass +class ClamBCConvertMemsetsTo32Bit : public PassInfoMixin { public: static char ID; - ConvertIntrinsics() - : ModulePass(ID) {} + ClamBCConvertMemsetsTo32Bit() {} - virtual ~ConvertIntrinsics() {} + virtual ~ClamBCConvertMemsetsTo32Bit() {} - virtual bool runOnModule(Module& mod) + PreservedAnalyses run(Module & mod, ModuleAnalysisManager & MAM) { bChanged = false; pMod = &mod; @@ -46,7 +46,11 @@ class ConvertIntrinsics : public ModulePass delLst[i]->eraseFromParent(); } - return bChanged; + if (bChanged){ + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); } protected: @@ -67,7 +71,8 @@ class ConvertIntrinsics : public ModulePass { for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { if (CallInst* pci = llvm::dyn_cast(i)) { - if (Function* f = llvm::dyn_cast(pci->getCalledValue())) { + Function * f = pci->getCalledFunction(); + if (nullptr != f) { if ("llvm.memset.p0i8.i64" == f->getName()) { convertMemset(pci); } @@ -81,39 +86,25 @@ class ConvertIntrinsics : public ModulePass std::vector args; Type* i32Ty = Type::getInt32Ty(pMod->getContext()); - for (size_t i = 0; i < pci->getNumArgOperands(); i++) { + for (size_t i = 0; i < pci->arg_size(); i++) + { Value* pv = pci->getArgOperand(i); if (2 == i) { if (ConstantInt* ci = llvm::dyn_cast(pv)) { pv = ConstantInt::get(i32Ty, ci->getValue().getLimitedValue()); } else { - pv = CastInst::CreateTruncOrBitCast(pv, i32Ty, "ConvertIntrinsics_trunc_", pci); + pv = CastInst::CreateTruncOrBitCast(pv, i32Ty, "ClamBCConvertMemsetsTo32Bit_trunc_", pci); } } args.push_back(pv); } - Constant* f = getNewMemset(); - CallInst::Create(getMemsetType(), f, args, "", pci); + FunctionCallee f = pMod->getOrInsertFunction("llvm.memset.p0i8.i32", getMemsetType()); + CallInst::Create(f, args, "", pci); delLst.push_back(pci); } - llvm::Constant* getNewMemset() - { - static llvm::Constant* ret = nullptr; - - if (nullptr == ret) { - - FunctionType* retType = getMemsetType(); - ret = pMod->getOrInsertFunction("llvm.memset.p0i8.i32", retType); - - assert(ret && "Could not get memset"); - } - - return ret; - } - llvm::FunctionType* getMemsetType() { static FunctionType* retType = nullptr; @@ -129,7 +120,24 @@ class ConvertIntrinsics : public ModulePass } // end of anonymous namespace -char ConvertIntrinsics::ID = 0; -static RegisterPass XX("clambc-convert-intrinsics", "Convert Intrinsics to 32-bit", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCConvertMemsetsTo32Bit", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-convert-memsets-to-32Bit"){ + FPM.addPass(ClamBCConvertMemsetsTo32Bit::ClamBCConvertMemsetsTo32Bit()); + return true; + } + return false; + } + ); + } + }; +} + + diff --git a/libclambcc/ClamBCExtendPHIsTo64Bit/CMakeLists.txt b/libclambcc/ClamBCExtendPHIsTo64Bit/CMakeLists.txt new file mode 100644 index 0000000000..0e96bf6fad --- /dev/null +++ b/libclambcc/ClamBCExtendPHIsTo64Bit/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcextendphisto64bit object library +# +add_library(clambcextendphisto64bit_obj OBJECT) +target_sources(clambcextendphisto64bit_obj + PRIVATE + ClamBCExtendPHIsTo64Bit.cpp +) + +target_include_directories(clambcextendphisto64bit_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcextendphisto64bit_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcextendphisto64bit_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcextendphisto64bit shared library. +# +add_library( clambcextendphisto64bit SHARED ) +target_link_libraries( clambcextendphisto64bit + PUBLIC + clambcextendphisto64bit_obj ) +set_target_properties( clambcextendphisto64bit PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcextendphisto64bit PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcextendphisto64bit PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcextendphisto64bit DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcextendphisto64bit DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp b/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp index 96566027b8..c9181e7b00 100644 --- a/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp +++ b/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp @@ -19,14 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" -#include "clambc.h" -#include "ClamBCModule.h" -#include "ClamBCAnalyzer/ClamBCAnalyzer.h" +#include "Common/bytecode_api.h" +#include "Common/clambc.h" #include "Common/ClamBCUtilities.h" #include -//#include "ClamBCTargetMachine.h" #include #include #include @@ -37,7 +34,6 @@ #include #include #include -//#include "llvm/Config/config.h" #include #include #include @@ -50,12 +46,16 @@ #include +#include +#include + using namespace llvm; -class ClamBCExtendPHIsTo64Bit : public ModulePass +class ClamBCExtendPHIsTo64Bit : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; + bool bChanged = false; virtual void convertPHIs(Function *pFunc) { @@ -115,17 +115,17 @@ class ClamBCExtendPHIsTo64Bit : public ModulePass Instruction *cast = CastInst::CreateIntegerCast(newNode, origType, true, "ClamBCConvertPHINodes_", insPt); pn->replaceAllUsesWith(cast); pn->eraseFromParent(); + bChanged = true; } public: static char ID; - explicit ClamBCExtendPHIsTo64Bit() - : ModulePass(ID) {} + explicit ClamBCExtendPHIsTo64Bit() {} virtual ~ClamBCExtendPHIsTo64Bit() {} - virtual bool runOnModule(Module &m) + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) { pMod = &m; @@ -135,16 +135,38 @@ class ClamBCExtendPHIsTo64Bit : public ModulePass convertPHIs(pFunc); } - return true; + if (bChanged){ + /* Since we changed the IR here invalidate all the previous analysis. + * We only want to invalidate the analysis when we change something, + * since it is expensive to compute. + */ + return PreservedAnalyses::none(); + } + /*We didn't change anything, so keep the previous analysis.*/ + return PreservedAnalyses::all(); } }; -char ClamBCExtendPHIsTo64Bit::ID = 0; -static RegisterPass X("clambc-extend-phis-to-64bit", "ClamBCExtendPHIsTo64Bit Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); - -llvm::ModulePass *createClamBCExtendPHIsTo64Bit() -{ - return new ClamBCExtendPHIsTo64Bit(); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCExtendPHIsTo64Bit", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-extend-phis-to-64-bit"){ + FPM.addPass(ClamBCExtendPHIsTo64Bit()); + return true; + } + return false; + } + ); + } + }; } + + + + diff --git a/libclambcc/ClamBCLogicalCompiler/CMakeLists.txt b/libclambcc/ClamBCLogicalCompiler/CMakeLists.txt new file mode 100644 index 0000000000..ade02f4c28 --- /dev/null +++ b/libclambcc/ClamBCLogicalCompiler/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambclogicalcompiler object library +# +add_library(clambclogicalcompiler_obj OBJECT) +target_sources(clambclogicalcompiler_obj + PRIVATE + ClamBCLogicalCompiler.cpp +) + +target_include_directories(clambclogicalcompiler_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambclogicalcompiler_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambclogicalcompiler_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambclogicalcompiler shared library. +# +add_library( clambclogicalcompiler SHARED ) +target_link_libraries( clambclogicalcompiler + PUBLIC + clambclogicalcompiler_obj ) +set_target_properties( clambclogicalcompiler PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambclogicalcompiler PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambclogicalcompiler PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambclogicalcompiler DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambclogicalcompiler DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp b/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp index 18adfb04ae..76ad3d38ab 100644 --- a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp +++ b/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp @@ -20,56 +20,53 @@ * MA 02110-1301, USA. */ -#include "ClamBCModule.h" +#include "Common/clambc.h" +#include "Common/bytecode_api.h" +#include "Common/ClamBCDiagnostics.h" +#include "Common/ClamBCCommon.h" +#include "Common/ClamBCUtilities.h" + #include -#include "../Common/bytecode_api.h" -#include "clambc.h" -#include "ClamBCDiagnostics.h" -#include "ClamBCModule.h" -#include "ClamBCCommon.h" -#include "ClamBCUtilities.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/Analysis/ConstantFolding.h" +#include +#include +#include +#include #include -#include "llvm/Analysis/ValueTracking.h" +#include #include #include #include #include -//#include -#include -#include +#include +#include +#include #include -#include "llvm/Support/Debug.h" +#include #include -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" +#include +#include #include -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/IPO.h" +#include +#include +#include #include -//#include #include #include + #define DEBUG_TYPE "lsigcompiler" using namespace llvm; -namespace +namespace ClamBCLogicalCompiler { -class ClamBCLogicalCompiler : public ModulePass +class ClamBCLogicalCompiler : public PassInfoMixin { public: - static char ID; - ClamBCLogicalCompiler() - : ModulePass(ID) {} + ClamBCLogicalCompiler() {} - virtual bool runOnModule(Module &M); + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -90,9 +87,9 @@ class ClamBCLogicalCompiler : public ModulePass bool compileVirusNames(Module &M, unsigned kind); }; -char ClamBCLogicalCompiler::ID = 0; -RegisterPass X("clambc-lcompiler", - "ClamAV Logical Compiler"); + + + enum LogicalKind { LOG_SUBSIGNATURE, LOG_AND, @@ -601,13 +598,22 @@ class LogicalCompiler { Value *V = LI.getOperand(0); ConstantExpr *CE = dyn_cast(V); - if (!CE || CE->getOpcode() != Instruction::GetElementPtr || - CE->getOperand(0) != GV || CE->getNumOperands() != 3 || - !cast(CE->getOperand(1))->isZero()) { - printDiagnostic("Logical signature: unsupported read", &LI); - return false; + ConstantInt * CI = nullptr; + if (CE) { + if (CE->getOpcode() != Instruction::GetElementPtr || + CE->getOperand(0) != GV || CE->getNumOperands() != 3 || + !cast(CE->getOperand(1))->isZero()) { + printDiagnostic("Logical signature: unsupported read", &LI); + return false; + } + CI = cast(CE->getOperand(2)); + } else { + /* In this case, we are directly loading the global, + * instead of using a getelementptr. + * It is likely that this would have been changed by O3. + */ + CI = ConstantInt::get(LI.getParent()->getParent()->getParent()->getContext(), APInt(64, 0)); } - ConstantInt *CI = cast(CE->getOperand(2)); Map[&LI] = LogicalNode::getSubSig(allNodes, CI->getValue().getZExtValue()); return true; } @@ -931,6 +937,25 @@ class LogicalCompiler } Instruction *pInst = llvm::cast(I); +#if 0 + /*Look through all operands of the instruction and add the + * constants to the logical map, so that we won't fail to create + * the siganture if O3 changes a logical expression to a constant + * at compile time. + */ + for (size_t i = 0; i < pInst->getNumOperands(); i++){ + if (ConstantInt * pci = llvm::dyn_cast(pInst->getOperand(i))){ + if (pci->isOne()){ + LogicalNode * ln = LogicalNode::getTrue(allNodes) ; + Map[pci] = ln; + } else if (pci->isZero()){ + LogicalNode * ln = LogicalNode::getTrue(allNodes) ; + Map[pci] = ln; + } + } + } +#endif + switch (I->getOpcode()) { case Instruction::Load: valid &= processLoad(*cast(I)); @@ -965,18 +990,132 @@ class LogicalCompiler LogicalMap::iterator CondNode = Map.find(SI->getCondition()); LogicalMap::iterator TrueNode = Map.find(SI->getTrueValue()); LogicalMap::iterator FalseNode = Map.find(SI->getFalseValue()); + + +#if 0 + if (Map.end() == TrueNode){ + Value * pv = SI->getTrueValue(); + if (ConstantInt * pci = llvm::dyn_cast(pv)){ + if (pci->isOne()){ + LogicalNode * ln = LogicalNode::getTrue(allNodes) ; + Map[SI->getTrueValue()] = ln; + TrueNode = Map.find(SI->getTrueValue()); + } + } + } + +#endif + + +#if 0 if (CondNode == Map.end() || TrueNode == Map.end() || FalseNode == Map.end()) { + assert (0 && "FTT"); printDiagnostic("Logical signature: select operands must be logical" " expressions", SI); return false; } +#else + /*O3 creates blocks that look like the following, which are legitimate blocks. + * This is essentially an AND of all the %cmp.i instructions. + * Since the cmp instructions all have false at the end, comparisons will be skipped + * after one is found to be false, without having a bunch of branch instructions. + * + * We are going to handle these cases by only adding an 'and' or an 'or' if there is + * an actual logical operation, not for constants. + * + + entry: + %0 = load i32, ptr @__clambc_match_counts, align 16 + %cmp.i116.not = icmp eq i32 %0, 0 + %1 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 1), align 4 + %cmp.i112.not = icmp eq i32 %1, 0 + %or.cond = select i1 %cmp.i116.not, i1 %cmp.i112.not, i1 false + %2 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 2), align 8 + %cmp.i108.not = icmp eq i32 %2, 0 + %or.cond1 = select i1 %or.cond, i1 %cmp.i108.not, i1 false + %3 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 3), align 4 + %cmp.i104.not = icmp eq i32 %3, 0 + + + .... + + br i1 %or.cond15, label %lor.rhs, label %lor.end + + lor.rhs: ; preds = %entry + %17 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 17), align 4 + %cmp.i = icmp ne i32 %17, 0 + br label %lor.end + + lor.end: ; preds = %lor.rhs, %entry + %18 = phi i1 [ true, %entry ], [ %cmp.i, %lor.rhs ] + ret i1 %18 + + */ + if (CondNode == Map.end() || (TrueNode == Map.end() && FalseNode == Map.end())){ + printDiagnostic("Logical signature: select condition must be logical" + " expression", + SI); + return false; + } +#endif // select cond, trueval, falseval -> cond && trueval || !cond && falseval - LogicalNode *N = LogicalNode::getAnd(CondNode->second, - TrueNode->second); - LogicalNode *NotCond = LogicalNode::getNot(CondNode->second); - LogicalNode *N2 = LogicalNode::getAnd(NotCond, FalseNode->second); - Map[SI] = LogicalNode::getOr(N, N2); + LogicalNode *N = nullptr; + LogicalNode *NotCond = nullptr; + LogicalNode *N2 = nullptr; + + if (TrueNode != Map.end()){ + N = LogicalNode::getAnd(CondNode->second, + TrueNode->second); + } else if (ConstantInt * pci = llvm::cast(SI->getTrueValue())){ + if (pci->isOne()){ + N = LogicalNode::getNode(*(CondNode->second)); + } else if (not pci->isZero()) { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + NotCond = LogicalNode::getNot(CondNode->second); + if (FalseNode != Map.end()){ + N2 = LogicalNode::getAnd(NotCond, FalseNode->second); + } else if (ConstantInt * pci = llvm::cast(SI->getFalseValue())){ + if (pci->isOne()){ + N2 = NotCond; + } else if (not pci->isZero()){ + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + LogicalNode * res = nullptr; + if (N && N2){ + res = LogicalNode::getOr(N, N2); + } else if (N){ + res = N; + } else if (N2){ + res = N2; + } else { + /*SHOULD be impossible, but will add a check just in case.*/ + printDiagnostic("Logical signature: Malformed select statement.", + SI); + return false; + } + Map[SI] = res; break; } case Instruction::Ret: { @@ -1631,6 +1770,7 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) bool Valid = true; for (auto I : F->users()) { +#if 0 Value *pv = nullptr; pv = llvm::cast(I); CallSite CS(pv); @@ -1644,14 +1784,48 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) continue; } assert(CS.arg_size() == 2 && "setvirusname has 2 args"); +#else + CallInst * pCallInst = llvm::cast(I); + if (nullptr == pCallInst){ + assert (0 && "NOT sure how this is possible"); + continue; + } + + if (F != pCallInst->getCalledFunction()){ + + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">NOT SURE HOW THIS IS POSSIBLE\n"; + + /*Not sure how this is possible, either*/ + printDiagnostic("setvirusname can only be directly called", + pCallInst); + Valid = false; + continue; + } +#endif + + if (2 != pCallInst->arg_size()){ + printDiagnostic("setvirusname has 2 args", pCallInst); + Valid = false; + continue; + } + std::string param; llvm::StringRef sr; +#if 0 Value *V = CS.getArgument(0); +#else +#endif + Value * V = llvm::cast(pCallInst->arg_begin()); + if (nullptr == V){ + printDiagnostic("Invalid argument passed to setvirusname", pCallInst); + Valid = false; + continue; + } bool result = getConstantStringInfo(V, sr); param = sr.str(); if (!result) { printDiagnostic("Argument of foundVirus() must be a constant string", - CS.getInstruction()); + pCallInst); Valid = false; continue; } @@ -1662,23 +1836,32 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) if (!p.empty() && !virusNamesSet.count(p)) { printDiagnostic(Twine("foundVirus called with an undeclared virusname: ", p), - CS.getInstruction()); + pCallInst); Valid = false; continue; } // Add prefix std::string fullname = p.empty() ? virusNamePrefix : virusNamePrefix + "." + p.str(); - IRBuilder<> builder(CS.getInstruction()->getParent()); + IRBuilder<> builder(pCallInst->getParent()); Value *C = builder.CreateGlobalStringPtr(fullname.c_str()); IntegerType *I32Ty = Type::getInt32Ty(M.getContext()); +#if 0 CS.setArgument(0, C); CS.setArgument(1, ConstantInt::get(I32Ty, fullname.size())); +#else + pCallInst->setArgOperand(0, C); + pCallInst->setArgOperand(1, ConstantInt::get(I32Ty, fullname.size())); +#endif } return Valid; } +#if 0 bool ClamBCLogicalCompiler::runOnModule(Module &M) +#else + PreservedAnalyses ClamBCLogicalCompiler::run(Module & M, ModuleAnalysisManager & MAM) +#endif { bool Valid = true; LogicalSignature = ""; @@ -1705,14 +1888,21 @@ bool ClamBCLogicalCompiler::runOnModule(Module &M) GVKind->setConstant(true); } if (!compileVirusNames(M, kind)) { - if (!kind || kind == BC_STARTUP) - return true; + if (!kind || kind == BC_STARTUP) { + // return true; + return PreservedAnalyses::all(); + } Valid = false; } if (F) { +#if 0 LoopInfo &li = getAnalysis(*F).getLoopInfo(); - if (functionHasLoop(F, li)) { +#else + FunctionAnalysisManager &fam = MAM.getResult(M).getManager(); + LoopInfo * li = &fam.getResult(*F); +#endif + if (functionHasLoop(F, *li)) { printDiagnostic("Logical signature: loop/recursion not supported", F); Valid = false; } @@ -1842,13 +2032,45 @@ bool ClamBCLogicalCompiler::runOnModule(Module &M) // diagnostic already printed exit(42); } - return true; + return PreservedAnalyses::none(); } -} // namespace -const PassInfo *const ClamBCLogicalCompilerID = &X; - -llvm::ModulePass *createClamBCLogicalCompiler() +#if 0 +const PassInfo *const ClamBCLogicalCompilerID = &X; llvm::ModulePass *createClamBCLogicalCompiler() { return new ClamBCLogicalCompiler(); } +#endif + + +#if 0 +char ClamBCLogicalCompiler::ID = 0; +RegisterPass X("clambc-lcompiler", + "ClamAV Logical Compiler"); +#else + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLogicalCompiler", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-lcompiler"){ + FPM.addPass(ClamBCLogicalCompiler()); + return true; + } + return false; + } + ); + } + }; +} +#endif + + +} // namespace + + diff --git a/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt b/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt new file mode 100644 index 0000000000..27fdcc6be0 --- /dev/null +++ b/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambclogicalcompilerhelper object library +# +add_library(clambclogicalcompilerhelper_obj OBJECT) +target_sources(clambclogicalcompilerhelper_obj + PRIVATE + ClamBCLogicalCompilerHelper.cpp +) + +target_include_directories(clambclogicalcompilerhelper_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambclogicalcompilerhelper_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambclogicalcompilerhelper_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambclogicalcompilerhelper shared library. +# +add_library( clambclogicalcompilerhelper SHARED ) +target_link_libraries( clambclogicalcompilerhelper + PUBLIC + clambclogicalcompilerhelper_obj ) +set_target_properties( clambclogicalcompilerhelper PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambclogicalcompilerhelper PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambclogicalcompilerhelper PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambclogicalcompilerhelper DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambclogicalcompilerhelper DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp b/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp new file mode 100644 index 0000000000..77184ed9ba --- /dev/null +++ b/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp @@ -0,0 +1,224 @@ +/* + * Compile LLVM bytecode to logical signatures. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/clambc.h" +#include "Common/bytecode_api.h" +#include "Common/ClamBCDiagnostics.h" +#include "Common/ClamBCCommon.h" +#include "Common/ClamBCUtilities.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Since the logical compiler requires 'setvirusname' to only be called with a string constant, + * we are going to undo the PHI nodes added by O3 that would have to + * + * + * Consider the code + + return.sink.split: ; preds = %if.end39, %for.end + %.str.1.sink = phi ptr [ @.str, %for.end ], [ @.str.1, %if.end39 ] + %call.i70 = call i32 @setvirusname(ptr noundef nonnull %.str.1.sink, i32 noundef 0) #6 + br label %return + + We will just add the calls to setvirusname to the predecessor basic blocks. + * + * + */ + + +#define DEBUG_TYPE "lsigcompilerhelper" + +using namespace llvm; + +namespace ClamBCLogicalCompilerHelper +{ + + class ClamBCLogicalCompilerHelper : public PassInfoMixin + { + public: + ClamBCLogicalCompilerHelper() {} + + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM); + virtual void getAnalysisUsage(AnalysisUsage &AU) const + { + } + + protected: + llvm::Module *pMod = nullptr; + std::vector erase; + bool bChanged = false; + + virtual void populateArgs(const CallInst * pci, std::vector & args){ + for (auto i = pci->arg_begin(), e = pci->arg_end(); i != e; i++){ + args.push_back(llvm::dyn_cast(i)); + } + } + virtual void processPHI(PHINode * phi, Function * pCalledFunction, std::vector & args); + + virtual void fixupSetVirusNameCalls(); + + size_t getBranchIdx(llvm::BranchInst * pBranch, llvm::BasicBlock * pBB); + }; + + + size_t ClamBCLogicalCompilerHelper::getBranchIdx(llvm::BranchInst * pBranch, llvm::BasicBlock * pBB){ + for ( size_t ret = 0; ret < pBranch->getNumSuccessors(); ret++){ + if (pBranch->getSuccessor(ret) == pBB){ + return ret; + } + } + + ClamBCStop("Branch Instruction is not a predecessor to phi.", pBranch); + + return -1; + } + + /* + * Add calls to setvirusname for each constant string, rather allowing a phinode to + * choose the string. This is a requirement for ClamBCLogicalCompiler. + */ + void ClamBCLogicalCompilerHelper::processPHI(PHINode * phi, Function * pCalledFunction, std::vector & args){ + + for (size_t i = 0; i < phi->getNumIncomingValues(); i++){ + BasicBlock * pBB = phi->getIncomingBlock(i); + Value * pVal = phi->getIncomingValue(i); + + Instruction * pTerm = pBB->getTerminator(); + BranchInst * pBranch = llvm::cast(pTerm); /*I know this is a BranchInst, + and not a ReturnInst, because + it is a predecessor block to + my phi node, so no need for + a dyn_cast*/ + size_t branchIdx = getBranchIdx(pBranch, phi->getParent()); + + BasicBlock * pNew = BasicBlock::Create(pMod->getContext(), + "ClamBCLogicalCompilerHelper_call_SetVirusName_", phi->getParent()->getParent(), phi->getParent()); + pBranch->setSuccessor(branchIdx, pNew); + + args[0] = pVal; + + CallInst::Create(pCalledFunction->getFunctionType(), pCalledFunction, args, "ClamBCLogicalCompilerHelper_callInst", pNew); + BranchInst::Create(phi->getParent(), pNew); + } + } + + /* + * Find all calls to setvirusname, and make sure they aren't loading the + * first argument from a variable. + */ + void ClamBCLogicalCompilerHelper::fixupSetVirusNameCalls(){ + + std::vector calls; + Function *svn = pMod->getFunction("setvirusname"); + if (nullptr == svn){ + return; + } + for (auto iter : svn->users()) { + if (CallInst * pci = llvm::dyn_cast(iter)){ + Value * operand = pci->getOperand(0); + + if (PHINode * phi = llvm::dyn_cast(operand)){ + calls.push_back(pci); + } + } + } + + for (size_t i = 0; i < calls.size(); i++){ + CallInst * pci = calls[i]; + PHINode * phi = llvm::dyn_cast(pci->getOperand(0)); + std::vector args; + populateArgs(pci, args); + processPHI(phi, svn, args); + + erase.push_back(pci); + erase.push_back(phi); + } + + for (size_t i = 0; i < erase.size(); i++){ + erase[i]->eraseFromParent(); + } + + } + + + PreservedAnalyses ClamBCLogicalCompilerHelper::run(Module & mod, ModuleAnalysisManager & mam) + { + pMod = &mod; + + fixupSetVirusNameCalls(); + + if (bChanged){ + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + + // This part is the new way of registering your pass + extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK + llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLogicalCompilerHelper", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-lcompiler-helper"){ + FPM.addPass(ClamBCLogicalCompilerHelper()); + return true; + } + return false; + } + ); + } + }; + } + +} // namespace + + diff --git a/libclambcc/ClamBCLowering/CMakeLists.txt b/libclambcc/ClamBCLowering/CMakeLists.txt new file mode 100644 index 0000000000..d3317a939d --- /dev/null +++ b/libclambcc/ClamBCLowering/CMakeLists.txt @@ -0,0 +1,150 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcloweringf object library +# +add_library(clambcloweringf_obj OBJECT) +target_sources(clambcloweringf_obj + PRIVATE + ClamBCLowering.cpp + ClamBCLoweringF.cpp +) + +target_include_directories(clambcloweringf_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcloweringf_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcloweringf_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcloweringf shared library. +# +add_library( clambcloweringf SHARED ) +target_link_libraries( clambcloweringf + PUBLIC + clambcloweringf_obj ) +set_target_properties( clambcloweringf PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcloweringf PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcloweringf PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcloweringf DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcloweringf DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + + + + + +# +# The clambcloweringnf object library +# +add_library(clambcloweringnf_obj OBJECT) +target_sources(clambcloweringnf_obj + PRIVATE + ClamBCLowering.cpp + ClamBCLoweringNF.cpp +) + +target_include_directories(clambcloweringnf_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcloweringnf_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcloweringnf_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcloweringnf shared library. +# +add_library( clambcloweringnf SHARED ) +target_link_libraries( clambcloweringnf + PUBLIC + clambcloweringnf_obj ) +set_target_properties( clambcloweringnf PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcloweringnf PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcloweringnf PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcloweringnf DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcloweringnf DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + + + diff --git a/libclambcc/ClamBCLowering/ClamBCLowering.cpp b/libclambcc/ClamBCLowering/ClamBCLowering.cpp index 95ce631502..c4b1c76e5b 100644 --- a/libclambcc/ClamBCLowering/ClamBCLowering.cpp +++ b/libclambcc/ClamBCLowering/ClamBCLowering.cpp @@ -19,101 +19,18 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#define DEBUG_TYPE "bclowering" -#include -#include "clambc.h" -#include "ClamBCModule.h" - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/ConstantFolding.h" -#include -#include -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ValueTracking.h" -#include -#include -#include "llvm/CodeGen/IntrinsicLowering.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include "llvm/Support/CommandLine.h" -#include -#include -#include -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" -#include -#include "llvm/Transforms/Scalar.h" -#include "llvm/CodeGen/IntrinsicLowering.h" - -using namespace llvm; - -namespace -{ -class ClamBCLowering : public ModulePass -{ - public: - static char ID; - ClamBCLowering() - : ModulePass(ID) {} - - virtual ~ClamBCLowering() {} +#include "ClamBCLowering.h" - virtual llvm::StringRef getPassName() const - { - return "ClamAV Bytecode Lowering"; - } - virtual bool runOnModule(Module &M); - virtual void getAnalysisUsage(AnalysisUsage &AU) const - { - } +#include "Common/clambc.h" - protected: - virtual bool isFinal() = 0; - - private: - void lowerIntrinsics(IntrinsicLowering *IL, Function &F); - void simplifyOperands(Function &F); - void downsizeIntrinsics(Function &F); - void splitGEPZArray(Function &F); - void fixupBitCasts(Function &F); - void fixupGEPs(Function &F); - void fixupPtrToInts(Function &F); -}; - -class ClamBCLoweringNF : public ClamBCLowering -{ - public: - ClamBCLoweringNF() {} - virtual ~ClamBCLoweringNF() {} +#include +#include - protected: - virtual bool isFinal() - { - return false; - } -}; -class ClamBCLoweringF : public ClamBCLowering -{ - public: - ClamBCLoweringF() {} - virtual ~ClamBCLoweringF() {} +using namespace llvm; - protected: - virtual bool isFinal() - { - return true; - } -}; +namespace ClamBCLowering { -char ClamBCLowering::ID = 0; void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) { std::vector prototypesToGen; @@ -156,7 +73,7 @@ void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) Builder.SetInsertPoint(BO); Value *V = Builder.CreatePointerCast(PII->getOperand(0), PointerType::getUnqual(Type::getInt8Ty(F.getContext()))); - V = Builder.CreateGEP(V, Idx); + V = Builder.CreateGEP(V->getType(), V, Idx); V = Builder.CreatePtrToInt(V, BO->getType()); BO->replaceAllUsesWith(V); } else if (GetElementPtrInst *GEPI = dyn_cast(II)) { @@ -284,7 +201,7 @@ void ClamBCLowering::simplifyOperands(Function &F) if (ConstantExpr *CE = dyn_cast(II->getOperand(i))) { if (CE->getOpcode() == Instruction::GetElementPtr) { // rip out GEP expr and load it - Ops.push_back(new LoadInst(CE, "gepex_load", SI)); + Ops.push_back(new LoadInst(CE->getType(), CE, "gepex_load", SI)); Changed = true; } } else { @@ -368,47 +285,47 @@ static inline void addIntrinsicFunctions(llvm::Module *pMod, Intrinsic::getDeclaration(pMod, Intrinsic::memmove, {i8Ptr, i8Ptr, i32, i1}))); } -static llvm::Value *getReplacementSizeOperand(llvm::CallSite &CS, llvm::Value *Len) +static llvm::Value *getReplacementSizeOperand(llvm::CallInst * pCallInst, llvm::Value *Len) { - llvm::LLVMContext &Context = CS.getParent()->getParent()->getParent()->getContext(); + LLVMContext & context = pCallInst->getParent()->getParent()->getParent()->getContext(); Value *NewLen = NULL; if (ConstantInt *C = dyn_cast(Len)) { - NewLen = ConstantInt::get(Type::getInt32Ty(Context), + NewLen = ConstantInt::get(Type::getInt32Ty(context), C->getValue().getLimitedValue((1ULL << 32) - 1)); } else { - NewLen = new TruncInst(Len, Type::getInt32Ty(Context), "lvl_dwn", CS.getInstruction()); + NewLen = new TruncInst(Len, Type::getInt32Ty(context), "lvl_dwn", pCallInst); } return NewLen; } -static void populateArgumentList(llvm::CallSite &CS, llvm::Value *newLen, size_t idx, std::vector &Ops) +static void populateArgumentList(llvm::CallInst * pCallInst, llvm::Value *newLen, size_t idx, std::vector &Ops) { - for (unsigned i = 0; i < CS.arg_size(); ++i) { + for (unsigned i = 0; i < pCallInst->arg_size(); ++i) { if (i == idx) { Ops.push_back(newLen); } else { - Ops.push_back(CS.getArgument(i)); + Ops.push_back(pCallInst->getArgOperand(i)); } } } -static bool replaceIntrinsicCalls(llvm::MemIntrinsic *MI, std::pair rep, size_t idx) +static bool replaceIntrinsicCalls(llvm::MemIntrinsic *pMemIntrinsic, std::pair rep, size_t idx) { - llvm::Function *pCalled = MI->getCalledFunction(); + llvm::Function *pCalled = pMemIntrinsic->getCalledFunction(); { if (rep.first == pCalled) { - llvm::CallSite CS(MI); - Value *Len = CS.getArgument(2); - llvm::Value *newLen = getReplacementSizeOperand(CS, Len); + //llvm::CallSite CS(MI); + Value *Len = pMemIntrinsic->getArgOperand(2); + llvm::Value *newLen = getReplacementSizeOperand(pMemIntrinsic, Len); std::vector args; - populateArgumentList(CS, newLen, idx, args); + populateArgumentList(pMemIntrinsic, newLen, idx, args); assert(args.size() == 4 && "malformed intrinsic call!"); - llvm::Instruction *i = CallInst::Create(rep.second, args, MI->getName(), MI); + llvm::Instruction *i = CallInst::Create(rep.second, args, pMemIntrinsic->getName(), pMemIntrinsic); assert(i && "Failed to create new CallInst"); return true; @@ -421,7 +338,6 @@ static bool replaceIntrinsicCalls(llvm::MemIntrinsic *MI, std::pair InstDel; std::vector> repPairs; @@ -458,7 +374,7 @@ static void gatherAllocasWithBitcasts(llvm::BasicBlock *bb, std::vector [#uses=2] @@ -496,19 +412,9 @@ void ClamBCLowering::fixupBitCasts(Function &F) continue; } - /*aragusa - * I am getting an assertion failure trying to cast a value that is not an ArrayType - * to an ArrayType. I don't fully understand the reason for doing what we are doing here. - * I am just going to check if AI->getAllocatedType is an array type. I may need to revisit this later. - */ if (not llvm::isa(AI->getAllocatedType())) { continue; } - /*Intentionally leaving this debug message in, because I don't think this code is executed very often, and - * I don't believe it is necessary. Once I get the bugs ironed out of the header files, I am going to - * see if this ever prints and does not have an assertion failure. The iterators were previously not working - * correctly and in fixing them, I believe I turned on code that wasn't previously working.*/ - const ArrayType *arTy = cast(AI->getAllocatedType()); Type *APTy = PointerType::getUnqual(arTy->getElementType()); @@ -517,7 +423,6 @@ void ClamBCLowering::fixupBitCasts(Function &F) AIC->setName("ClamBCLowering_fixupBitCasts"); BasicBlock::iterator IP = AI->getParent()->begin(); while (isa(IP)) ++IP; - //Value *Idx[] = {Zero, Zero}; llvm::ArrayRef Idxs = {Zero, Zero}; V = GetElementPtrInst::Create(nullptr, AIC, Idxs, "base_gepz", AI); @@ -545,7 +450,6 @@ void ClamBCLowering::fixupGEPs(Function &F) std::vector indexes; GetElementPtrInst::op_iterator J = GEPI->idx_begin(), JE = GEPI->idx_end(); for (; J != JE; ++J) { - //llvm::Value * v = llvm::cast(J); // push all constants if (Constant *C = dyn_cast(*J)) { indexes.push_back(C); @@ -557,9 +461,6 @@ void ClamBCLowering::fixupGEPs(Function &F) break; } Constant *C = cast(GEPI->getOperand(0)); - //Constant *GC = ConstantExpr::getInBoundsGetElementPtr(C, - // &indexes[0], - // indexes.size()); Constant *GC = ConstantExpr::getInBoundsGetElementPtr(nullptr, C, indexes); if (J != JE) { @@ -567,11 +468,10 @@ void ClamBCLowering::fixupGEPs(Function &F) for (; J != JE; ++J) { indexes.push_back(*J); } - //AllocaInst *AI = new AllocaInst(GC->getType(), "", Entry->begin()); AllocaInst *AI = new AllocaInst(GC->getType(), 0, "ClamBCLowering_fixupGEPs", llvm::cast(Entry->begin())); new StoreInst(GC, AI, GEPI); - Value *L = new LoadInst(AI, "ClamBCLowering_fixupGEPs", GEPI); - Value *V = GetElementPtrInst::CreateInBounds(L, indexes, "ClamBCLowering_fixupGEPs", GEPI); + Value *L = new LoadInst(AI->getType(), AI, "ClamBCLowering_fixupGEPs", GEPI); + Value *V = GetElementPtrInst::CreateInBounds(L->getType(), L, indexes, "ClamBCLowering_fixupGEPs", GEPI); GEPI->replaceAllUsesWith(V); GEPI->eraseFromParent(); } else { @@ -629,7 +529,7 @@ void ClamBCLowering::splitGEPZArray(Function &F) continue; } const PointerType *Ty = cast(GEPI->getPointerOperand()->getType()); - const ArrayType *ATy = dyn_cast(Ty->getElementType()); + const ArrayType *ATy = dyn_cast(Ty->getArrayElementType()); if (!ATy) { continue; } @@ -637,18 +537,21 @@ void ClamBCLowering::splitGEPZArray(Function &F) Constant *Zero = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 0); Value *VZ[] = {Zero, Zero}; // transform GEPZ: [4 x i16]* %p, 0, %i -> GEP1 i16* (bitcast)%p, %i - Value *C = GetElementPtrInst::CreateInBounds(GEPI->getPointerOperand(), VZ, "ClamBCLowering_splitGEPZArray", GEPI); - Value *NG = GetElementPtrInst::CreateInBounds(C, V, "ClamBCLowering_splitGEPZArray", GEPI); + Value *C = GetElementPtrInst::CreateInBounds(GEPI->getPointerOperand()->getType(), GEPI->getPointerOperand(), VZ, "ClamBCLowering_splitGEPZArray", GEPI); + Value *NG = GetElementPtrInst::CreateInBounds(C->getType(), C, V, "ClamBCLowering_splitGEPZArray", GEPI); GEPI->replaceAllUsesWith(NG); GEPI->eraseFromParent(); } } } -bool ClamBCLowering::runOnModule(Module &M) +PreservedAnalyses ClamBCLowering::run(Module & m, ModuleAnalysisManager & MAM) { - for (Module::iterator I = M.begin(), E = M.end(); + pMod = &m; + pContext = &(pMod->getContext()); + + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { if (I->isDeclaration()) continue; @@ -663,14 +566,11 @@ bool ClamBCLowering::runOnModule(Module &M) } } - return true; + return PreservedAnalyses::none(); } -} // namespace -static RegisterPass X("clambc-lowering-notfinal", "ClamBC Lowering Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); -static RegisterPass XX("clambc-lowering-final", "ClamBC Lowering Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); + +} //namespace + + diff --git a/libclambcc/ClamBCLowering/ClamBCLowering.h b/libclambcc/ClamBCLowering/ClamBCLowering.h new file mode 100644 index 0000000000..e6b3cbf3b2 --- /dev/null +++ b/libclambcc/ClamBCLowering/ClamBCLowering.h @@ -0,0 +1,61 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +//#define DEBUG_TYPE "bclowering" + +#include +#include + +namespace ClamBCLowering +{ + +class ClamBCLowering : public llvm::PassInfoMixin +{ + public: + ClamBCLowering() {} + + virtual ~ClamBCLowering() {} + + virtual llvm::StringRef getPassName() const + { + return "ClamAV Bytecode Lowering"; + } + virtual llvm::PreservedAnalyses run(llvm::Module & m, llvm::ModuleAnalysisManager & MAM) ; + virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const + { + } + + protected: + virtual bool isFinal() = 0; + llvm::LLVMContext * pContext = nullptr; + llvm::Module * pMod = nullptr; + + private: + void lowerIntrinsics(llvm::IntrinsicLowering *IL, llvm::Function &F); + void simplifyOperands(llvm::Function &F); + void downsizeIntrinsics(llvm::Function &F); + void splitGEPZArray(llvm::Function &F); + void fixupBitCasts(llvm::Function &F); + void fixupGEPs(llvm::Function &F); + void fixupPtrToInts(llvm::Function &F); +}; + +} diff --git a/libclambcc/ClamBCLowering/ClamBCLoweringF.cpp b/libclambcc/ClamBCLowering/ClamBCLoweringF.cpp new file mode 100644 index 0000000000..ecc52b5cdf --- /dev/null +++ b/libclambcc/ClamBCLowering/ClamBCLoweringF.cpp @@ -0,0 +1,70 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "ClamBCLowering.h" + +#include "Common/clambc.h" + +#include + + + +using namespace llvm; + +namespace ClamBCLowering { + +class ClamBCLoweringF : public ClamBCLowering +{ + public: + ClamBCLoweringF() {} + virtual ~ClamBCLoweringF() {} + + protected: + virtual bool isFinal() + { + return true; + } +}; + + +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLowering", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-lowering-final"){ + FPM.addPass(ClamBCLoweringF()); + return true; + } + return false; + } + ); + } + }; +} + + +} + diff --git a/libclambcc/ClamBCLowering/ClamBCLoweringNF.cpp b/libclambcc/ClamBCLowering/ClamBCLoweringNF.cpp new file mode 100644 index 0000000000..dca1b7d9c9 --- /dev/null +++ b/libclambcc/ClamBCLowering/ClamBCLoweringNF.cpp @@ -0,0 +1,69 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "ClamBCLowering.h" + +#include "Common/clambc.h" + +#include + + +using namespace llvm; + +namespace ClamBCLowering { + +class ClamBCLoweringNF : public ClamBCLowering +{ + public: + ClamBCLoweringNF() {} + virtual ~ClamBCLoweringNF() {} + + protected: + virtual bool isFinal() + { + return false; + } +}; + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLowering", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-lowering-notfinal"){ + FPM.addPass(ClamBCLoweringNF()); + return true; + } + return false; + } + ); + } + }; +} + + +} //namespace + diff --git a/libclambcc/ClamBCModule/ClamBCModule.cpp b/libclambcc/ClamBCModule/ClamBCModule.cpp deleted file mode 100644 index 33de28eb59..0000000000 --- a/libclambcc/ClamBCModule/ClamBCModule.cpp +++ /dev/null @@ -1,30 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "clambc.h" - -using namespace llvm; - -namespace -{ -struct ClamBCModule : public FunctionPass { - static char ID; - ClamBCModule() - : FunctionPass(ID) {} - - bool runOnFunction(Function &F) override - { - return false; - } -}; // end of struct ClamBCModule -} // end of anonymous namespace - -char ClamBCModule::ID = 0; -static RegisterPass X("clambc-module", "ClamBCModule Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCOutlineEndiannessCalls/CMakeLists.txt b/libclambcc/ClamBCOutlineEndiannessCalls/CMakeLists.txt new file mode 100644 index 0000000000..8a858d903b --- /dev/null +++ b/libclambcc/ClamBCOutlineEndiannessCalls/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcoutlineendiannesscalls object library +# +add_library(clambcoutlineendiannesscalls_obj OBJECT) +target_sources(clambcoutlineendiannesscalls_obj + PRIVATE + ClamBCOutlineEndiannessCalls.cpp +) + +target_include_directories(clambcoutlineendiannesscalls_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcoutlineendiannesscalls_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcoutlineendiannesscalls_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcoutlineendiannesscalls shared library. +# +add_library( clambcoutlineendiannesscalls SHARED ) +target_link_libraries( clambcoutlineendiannesscalls + PUBLIC + clambcoutlineendiannesscalls_obj ) +set_target_properties( clambcoutlineendiannesscalls PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcoutlineendiannesscalls PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcoutlineendiannesscalls PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcoutlineendiannesscalls DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcoutlineendiannesscalls DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp b/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp index 2048ecb7ce..f5c0380c1d 100644 --- a/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp +++ b/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp @@ -1,20 +1,23 @@ +#include "Common/clambc.h" + #include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include + +#include +#include -#include "Common/clambc.h" using namespace llvm; namespace { -class OutlineEndniassCalls : public ModulePass + class ClamBCOutlineEndiannessCalls : public PassInfoMixin { protected: bool bChanged = false; @@ -25,7 +28,7 @@ class OutlineEndniassCalls : public ModulePass for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { CallInst* pCall = llvm::dyn_cast(i); if (pCall) { - if ("__is_bigendian" == pCall->getCalledValue()->getName()) { + if ("__is_bigendian" == pCall->getCalledFunction()->getName()) { calls.push_back(pCall); } } @@ -79,33 +82,56 @@ class OutlineEndniassCalls : public ModulePass public: static char ID; - OutlineEndniassCalls() - : ModulePass(ID) {} + ClamBCOutlineEndiannessCalls() {} - virtual bool runOnModule(Module& m) override + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) { pMod = &m; std::vector calls = findCalls(); if (0 == calls.size()) { - return false; + return PreservedAnalyses::all(); } Function* pNew = getNewEndiannessFunction(calls[0]); for (size_t i = 0; i < calls.size(); i++) { - CallInst* pNewCall = CallInst::Create(pNew, "OutlineEndniassCalls_", calls[i]); + CallInst* pNewCall = CallInst::Create(pNew, "ClamBCOutlineEndiannessCalls_", calls[i]); calls[i]->replaceAllUsesWith(pNewCall); calls[i]->eraseFromParent(); } - return bChanged; + if (bChanged){ + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } -}; // end of struct OutlineEndniassCalls +}; // end of struct ClamBCOutlineEndiannessCalls } // end of anonymous namespace -char OutlineEndniassCalls::ID = 0; -static RegisterPass X("clambc-outline-endianness-calls", "OutlineEndniassCalls TEST Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCOutlineEndiannessCalls", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-outline-endianness-calls"){ + FPM.addPass(ClamBCOutlineEndiannessCalls()); + return true; + } + return false; + } + ); + } + }; +} + + + + + + diff --git a/libclambcc/ClamBCPrepareGEPsForWriter/CMakeLists.txt b/libclambcc/ClamBCPrepareGEPsForWriter/CMakeLists.txt new file mode 100644 index 0000000000..e4b09f4a1c --- /dev/null +++ b/libclambcc/ClamBCPrepareGEPsForWriter/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcpreparegepsforwriter object library +# +add_library(clambcpreparegepsforwriter_obj OBJECT) +target_sources(clambcpreparegepsforwriter_obj + PRIVATE + ClamBCPrepareGEPsForWriter.cpp +) + +target_include_directories(clambcpreparegepsforwriter_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcpreparegepsforwriter_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcpreparegepsforwriter_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcpreparegepsforwriter shared library. +# +add_library( clambcpreparegepsforwriter SHARED ) +target_link_libraries( clambcpreparegepsforwriter + PUBLIC + clambcpreparegepsforwriter_obj ) +set_target_properties( clambcpreparegepsforwriter PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcpreparegepsforwriter PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcpreparegepsforwriter PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcpreparegepsforwriter DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcpreparegepsforwriter DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp b/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp index 8033f1f50b..3604e8495a 100644 --- a/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp +++ b/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp @@ -19,14 +19,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" -#include "clambc.h" -#include "ClamBCModule.h" +#include "Common/bytecode_api.h" +#include "Common/clambc.h" +#include "Common/ClamBCModule.h" #include "ClamBCAnalyzer/ClamBCAnalyzer.h" #include "Common/ClamBCUtilities.h" #include -//#include "ClamBCTargetMachine.h" #include #include #include @@ -37,7 +36,6 @@ #include #include #include -//#include "llvm/Config/config.h" #include #include #include @@ -47,12 +45,14 @@ #include #include #include +#include +#include #include using namespace llvm; -class ClamBCPrepareGEPsForWriter : public ModulePass +struct ClamBCPrepareGEPsForWriter : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; @@ -60,8 +60,7 @@ class ClamBCPrepareGEPsForWriter : public ModulePass public: static char ID; - explicit ClamBCPrepareGEPsForWriter() - : ModulePass(ID) {} + explicit ClamBCPrepareGEPsForWriter() {} virtual ~ClamBCPrepareGEPsForWriter() {} @@ -232,10 +231,10 @@ class ClamBCPrepareGEPsForWriter : public ModulePass Value *gepiNew = underlyingObject; if (gepiNew->getType()->getPointerElementType()->isArrayTy()) { - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, Idxs, "processGEPI_2_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, Idxs, "processGEPI_2_", pgepi); } - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, vCnt, "processGEPI_3_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, vCnt, "processGEPI_3_", pgepi); CastInst *ciNew = CastInst::CreatePointerCast(gepiNew, pgepi->getType(), "processGEPI_", pgepi); @@ -305,10 +304,10 @@ class ClamBCPrepareGEPsForWriter : public ModulePass Value *gepiNew = underlyingObject; if (gepiNew->getType()->getPointerElementType()->isArrayTy()) { - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, Idxs, "processGEPI_0_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, Idxs, "processGEPI_0_", pgepi); } - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, vCnt, "processGEPI_1_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, vCnt, "processGEPI_1_", pgepi); CastInst *ciNew = CastInst::CreatePointerCast(gepiNew, pgepi->getType(), "processGEPI_", pgepi); @@ -372,7 +371,7 @@ class ClamBCPrepareGEPsForWriter : public ModulePass } } - virtual bool runOnModule(Module &m) + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) { pMod = &m; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -387,7 +386,7 @@ class ClamBCPrepareGEPsForWriter : public ModulePass fixCasts(pFunc); } - return true; + return PreservedAnalyses::none(); } virtual void fixCasts(Function *pFunc) @@ -417,12 +416,30 @@ class ClamBCPrepareGEPsForWriter : public ModulePass } }; -char ClamBCPrepareGEPsForWriter::ID = 0; -static RegisterPass X("clambc-prepare-geps-for-writer", "ClamBCPrepareGEPsForWriter Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); - -llvm::ModulePass *createClamBCPrepareGEPsForWriter() -{ - return new ClamBCPrepareGEPsForWriter(); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCPrepareGEPsForWriter", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-prepare-geps-for-writer"){ + FPM.addPass(ClamBCPrepareGEPsForWriter()); + return true; + } + return false; + } + ); + } + }; } + + + + + + + + diff --git a/libclambcc/ClamBCPreserveABIs/CMakeLists.txt b/libclambcc/ClamBCPreserveABIs/CMakeLists.txt new file mode 100644 index 0000000000..a249e2e2c5 --- /dev/null +++ b/libclambcc/ClamBCPreserveABIs/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcpreserveabis object library +# +add_library(clambcpreserveabis_obj OBJECT) +target_sources(clambcpreserveabis_obj + PRIVATE + ClamBCPreserveABIs.cpp +) + +target_include_directories(clambcpreserveabis_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcpreserveabis_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcpreserveabis_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcpreserveabis shared library. +# +add_library( clambcpreserveabis SHARED ) +target_link_libraries( clambcpreserveabis + PUBLIC + clambcpreserveabis_obj ) +set_target_properties( clambcpreserveabis PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcpreserveabis PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcpreserveabis PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcpreserveabis DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcpreserveabis DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp b/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp index d735be0636..9e7fe3b8cc 100644 --- a/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp +++ b/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp @@ -7,8 +7,10 @@ #include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +//#include "llvm/IR/LegacyPassManager.h" +#include +#include +#include #include "Common/clambc.h" #include "Common/ClamBCUtilities.h" @@ -32,7 +34,7 @@ namespace * to fake functions. If it does find it (the second time), it removes those * calls. */ -class ClamBCPreserveABIs : public ModulePass +class ClamBCPreserveABIs : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; @@ -46,9 +48,9 @@ class ClamBCPreserveABIs : public ModulePass return; } FunctionType *pFunctionType = llvm::dyn_cast(pFunc->getType()); - std::string newname = pFunc->getName(); + std::string newname( pFunc->getName()); + pFunctionType = pFunc->getFunctionType(); newname += "_fake"; - pFunctionType = llvm::cast(llvm::cast(pFunc->getType())->getElementType()); Function *fakeFunction = Function::Create(pFunctionType, Function::ExternalLinkage, newname, pFunc->getParent()); fakeFunctions.push_back(fakeFunction); std::vector args; @@ -127,18 +129,16 @@ class ClamBCPreserveABIs : public ModulePass } public: - static char ID; - ClamBCPreserveABIs() - : ModulePass(ID) {} + ClamBCPreserveABIs() {} virtual ~ClamBCPreserveABIs() {} - bool runOnModule(Module &m) override + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) { pMod = &m; if (removeFakeFunctions()) { - return bChanged; + return PreservedAnalyses::none(); } for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -157,13 +157,41 @@ class ClamBCPreserveABIs : public ModulePass writeMetadata(); - return bChanged; + if (bChanged){ + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of struct ClamBCPreserveABIs } // end of anonymous namespace -char ClamBCPreserveABIs::ID = 0; -static RegisterPass X("clambc-preserve-abis", "Preserve ABIs", - false /* Only looks at CFG */, - false /* Analysis Pass */); + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCPreserveABIs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-preserve-abis"){ + FPM.addPass(ClamBCPreserveABIs()); + return true; + } + return false; + } + ); + } + }; +} + + + + + + + + + diff --git a/libclambcc/ClamBCRebuild/CMakeLists.txt b/libclambcc/ClamBCRebuild/CMakeLists.txt new file mode 100644 index 0000000000..f95952f586 --- /dev/null +++ b/libclambcc/ClamBCRebuild/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcrebuild object library +# +add_library(clambcrebuild_obj OBJECT) +target_sources(clambcrebuild_obj + PRIVATE + ClamBCRebuild.cpp +) + +target_include_directories(clambcrebuild_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcrebuild_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcrebuild_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcrebuild shared library. +# +add_library( clambcrebuild SHARED ) +target_link_libraries( clambcrebuild + PUBLIC + clambcrebuild_obj ) +set_target_properties( clambcrebuild PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcrebuild PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcrebuild PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcrebuild DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcrebuild DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp b/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp index c253bbded3..b27d19588d 100644 --- a/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp +++ b/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp @@ -19,13 +19,17 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ + +#include "Common/ClamBCModule.h" +#include "Common/clambc.h" +#include "Common/ClamBCUtilities.h" + + #include -#include #include #include #include #include -#include #include #include #include @@ -33,23 +37,22 @@ #include #include #include +#include +#include #include #include #include #include #include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" using namespace llvm; -class ClamBCRebuild : public ModulePass, public InstVisitor +class ClamBCRebuild : public PassInfoMixin, public InstVisitor { public: static char ID; - explicit ClamBCRebuild() - : ModulePass(ID) {} + explicit ClamBCRebuild() {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Backend Rebuilder"; @@ -83,7 +86,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Builder = new IRBuilder(*Context, TF); SE = nullptr; - Expander = nullptr; visitFunction(F, &NF); for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { @@ -104,6 +106,13 @@ class ClamBCRebuild : public ModulePass, public InstVisitor for (unsigned i = 0; i < N->getNumIncomingValues(); i++) { Value *V = mapPHIValue(N->getIncomingValue(i)); BasicBlock *BB = mapBlock(N->getIncomingBlock(i)); + + if (V->getType() != N->getType()){ + if (V->getType()->isPointerTy() and N->getType()->isPointerTy()){ + V = CastInst::CreatePointerCast(V,N->getType(), + "ClamBCRebuild_fixCast_", BB->getTerminator()); + } + } PN->addIncoming(V, BB); } assert(PN->getNumIncomingValues() > 0); @@ -114,9 +123,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor fixupCalls(F, copy); F->setLinkage(GlobalValue::InternalLinkage); - if (Expander) { - delete Expander; - } delete Builder; return true; } @@ -146,7 +152,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void fixupCallInst(CallInst *pCallInst, Function *pFunc) { assert(pCallInst->arg_size() == pFunc->arg_size() && "Incorrect number of arguments"); - assert(pCallInst->getCalledValue() == pFunc && "This CallInst doesn't call this function"); auto argIter = pFunc->arg_begin(), argEnd = pFunc->arg_end(); auto callIter = pCallInst->arg_begin(), callEnd = pCallInst->arg_end(); @@ -174,13 +179,12 @@ class ClamBCRebuild : public ModulePass, public InstVisitor } } - bool runOnModule(Module &M) + PreservedAnalyses run(Module & M, ModuleAnalysisManager & MAM) { pMod = &M; /* Taken from doInitialization. */ FMap.clear(); - //FMapRev.clear(); Context = &(pMod->getContext()); i8Ty = Type::getInt8Ty(*Context); @@ -189,6 +193,10 @@ class ClamBCRebuild : public ModulePass, public InstVisitor std::vector funcs; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { Function *pFunc = llvm::cast(i); + const FunctionType *FTy = pFunc->getFunctionType(); + if (FTy->isVarArg()){ + return PreservedAnalyses::all(); + } funcs.push_back(pFunc); } for (size_t i = 0; i < funcs.size(); i++) { @@ -196,7 +204,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor runOnFunction(*pFunc); } - return true; + return PreservedAnalyses::none(); } private: @@ -217,11 +225,9 @@ class ClamBCRebuild : public ModulePass, public InstVisitor ScalarEvolution *SE = nullptr; Type *i8Ty = nullptr; Type *i8pTy = nullptr; - //FunctionPassManager *FPM = nullptr; LLVMContext *Context = nullptr; DenseSet visitedBB; IRBuilder *Builder = nullptr; - SCEVExpander *Expander = nullptr; void stop(const std::string &Msg, const llvm::Instruction *I) { @@ -229,19 +235,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor } friend class InstVisitor; - const Type *getInnerElementType(const CompositeType *CTy) - { - const Type *ETy = nullptr; - // get pointer to first element - do { - assert(CTy->indexValid(0u)); - ETy = CTy->getTypeAtIndex(0u); - CTy = dyn_cast(ETy); - } while (CTy); - assert(ETy->isIntegerTy()); - return ETy; - } - Type *rebuildType(Type *Ty, bool i8only = false) { assert(Ty); @@ -347,7 +340,8 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Value *PV = mapValue(P); if (PV->getType() == Ty && !isa(PV)) { assert(!isa(PV) || - cast(Ty)->getElementType()->isIntegerTy()); + Ty->getPointerElementType()->isIntegerTy()); + return PV; } PV = PV->stripPointerCasts(); @@ -425,7 +419,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void visitLoadInst(LoadInst &I) { Value *P = I.getPointerOperand(); - VMap[&I] = Builder->CreateLoad(mapPointer(P, P->getType()), + VMap[&I] = Builder->CreateLoad(I.getType(), mapPointer(P, P->getType()), I.getName()); } @@ -451,12 +445,18 @@ class ClamBCRebuild : public ModulePass, public InstVisitor I != E; ++I) { idxs.push_back(mapValue(*I)); } + + Type * pt = P->getType(); + if (llvm::isa(pt)){ + pt = pt->getPointerElementType(); + } + if (II.isInBounds()) { //P = Builder->CreateInBoundsGEP(P, idxs.begin(), idxs.end()); - P = Builder->CreateInBoundsGEP(P, idxs, "clambcRebuildInboundsGEP"); + P = Builder->CreateInBoundsGEP(pt, P, idxs, "clambcRebuildInboundsGEP"); } else { //P = Builder->CreateGEP(P, idxs.begin(), idxs.end()); - P = Builder->CreateGEP(P, idxs, "clambcRebuildGEP"); + P = Builder->CreateGEP(pt, P, idxs, "clambcRebuildGEP"); } VMap[&II] = makeCast(P, rebuildType(II.getType())); ; @@ -599,13 +599,32 @@ class ClamBCRebuild : public ModulePass, public InstVisitor return ret; } }; -char ClamBCRebuild::ID = 0; -static RegisterPass X("clambc-rebuild", "ClamBCRebuild Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); -llvm::ModulePass *createClamBCRebuild(void) -{ - return new ClamBCRebuild(); + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRebuild", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-rebuild"){ + FPM.addPass(ClamBCRebuild()); + return true; + } + return false; + } + ); + } + }; } + + + + + + + diff --git a/libclambcc/ClamBCRegAlloc/CMakeLists.txt b/libclambcc/ClamBCRegAlloc/CMakeLists.txt new file mode 100644 index 0000000000..a78cbff901 --- /dev/null +++ b/libclambcc/ClamBCRegAlloc/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcregalloc object library +# +add_library(clambcregalloc_obj OBJECT) +target_sources(clambcregalloc_obj + PRIVATE + ClamBCRegAlloc.cpp +) + +target_include_directories(clambcregalloc_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcregalloc_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcregalloc_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcregalloc shared library. +# +add_library( clambcregalloc SHARED ) +target_link_libraries( clambcregalloc + PUBLIC + clambcregalloc_obj ) +set_target_properties( clambcregalloc PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcregalloc PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcregalloc PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcregalloc DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcregalloc DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.cpp b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.cpp new file mode 100644 index 0000000000..f7c651c31c --- /dev/null +++ b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.cpp @@ -0,0 +1,277 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include "ClamBCRegAlloc.h" +#include "Common/ClamBCUtilities.h" +#include "Common/clambc.h" + +#include +//#include "llvm/Analysis/LiveValues.h" +//#include "llvm/Config/config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +// We do have a virtually unlimited number of registers, but it is more cache +// efficient at runtime if we use a small number of them. +// Also it is easier for the interpreter if there are no phi nodes, +// so we transform phi nodes into a store/load pair into a temporary stack +// location. +// We don't use LLVM's register allocators, because they are for +// targets with fixed number of registers, and a much simpler allocator +// suffices for us. + + +llvm::AnalysisKey ClamBCRegAllocAnalyzer::Key; + +/*TODO: Should rework this so that we are not changing things with open iterators.*/ +void ClamBCRegAllocAnalysis::handlePHI(PHINode *PN) +{ + BasicBlock *BB = PN->getIncomingBlock(0); + for (unsigned i = 1; i < PN->getNumIncomingValues(); i++) { + BB = DT->findNearestCommonDominator(BB, PN->getIncomingBlock(i)); + } + Function *pFunc = BB->getParent(); + BasicBlock *pEntry = llvm::cast(pFunc->begin()); + Instruction *pFirst = llvm::cast(pEntry->begin()); + AllocaInst *AI = new AllocaInst(PN->getType(), pFunc->getAddressSpace(), ".phi", + pFirst); + llvm::IRBuilder<> builder(PN->getContext()); + unsigned MDDbgKind = PN->getContext().getMDKindID("dbg"); + if (MDDbgKind) { + if (MDNode *Dbg = PN->getMetadata(MDDbgKind)) { + DebugLoc dl(Dbg); + builder.SetCurrentDebugLocation(dl); + } + } + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + BasicBlock *BB = PN->getIncomingBlock(i); + Value *V = PN->getIncomingValue(i); + builder.SetInsertPoint(BB->getTerminator()); + Instruction *I = builder.CreateStore(V, AI); + builder.SetInstDebugLocation(I); + } + BasicBlock::iterator It(PN); + do { + ++It; + } while (isa(It)); + builder.SetInsertPoint(&*It); + LoadInst *LI = builder.CreateLoad(AI->getAllocatedType(), AI, ".phiload"); + builder.SetInstDebugLocation(LI); + PN->replaceAllUsesWith(LI); + PN->eraseFromParent(); +} + +bool ClamBCRegAllocAnalysis::runOnFunction(Function &F) +{ + ValueMap.clear(); + RevValueMap.clear(); + bool Changed = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock &BB = *I; + BasicBlock::iterator J = BB.begin(); + while (J != BB.end()) { + PHINode *PN = dyn_cast(J); + if (!PN) + break; + ++J; + handlePHI(PN); + } + } + + unsigned id = 0; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I) { + Argument *A = llvm::cast(I); + ValueMap[A] = id; + if (RevValueMap.size() == id) { + RevValueMap.push_back(A); + } + ++id; + } + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *II = llvm::cast(&*I); + if (ValueMap.count(II)) + continue; + if (II->getType()->getTypeID() == Type::VoidTyID) { + ValueMap[II] = ~0u; + continue; + } + if (II->use_empty() && !II->mayHaveSideEffects()) { + SkipMap.insert(II); + ValueMap[II] = ~0u; + continue; + } + + { + static int first = 1; + if (first){ + first = 0; + } + } + if (CastInst *BC = dyn_cast(II)) { + if (BitCastInst *BCI = dyn_cast(BC)) { + if (!BCI->isLosslessCast()) { + ClamBCStop("Non lossless bitcast is not supported", BCI); + } + + if (BCI->getSrcTy()->isPointerTy() and (not BCI->getDestTy()->isPointerTy())){ + ClamBCStop("Cast from pointer to non-pointer element", + BCI); + } + + if (AllocaInst *AI = dyn_cast(BCI->getOperand(0))) { + if (!AI->isArrayAllocation()) { + // we need to use a GEP 0,0 for bitcast here + ValueMap[II] = id; + if (RevValueMap.size() == id) { + RevValueMap.push_back(II); + } + ++id; + continue; + } + } + SkipMap.insert(II); + ValueMap[II] = getValueID(II->getOperand(0)); + continue; + } else if (llvm::isa(BC) or llvm::isa(BC)){ + ClamBCStop("Cast from pointer to non-pointer element", + BCI); + } + } + if (II->hasOneUse()) { + // single-use store to alloca -> store directly to alloca + if (StoreInst *SI = dyn_cast(*II->use_begin())) { + if (AllocaInst *AI = dyn_cast(SI->getPointerOperand())) { + if (!ValueMap.count(AI)) { + ValueMap[AI] = id; + if (RevValueMap.size() == id) { + RevValueMap.push_back(II); + } else { + errs() << id << " " << __FILE__ << ":" << __LINE__ << "\n"; + } + ++id; + } + ValueMap[II] = getValueID(AI); + continue; + } + } + // single-use of load from alloca -> use directly value id of alloca + } + ValueMap[II] = id; + if (RevValueMap.size() == id) { + RevValueMap.push_back(II); + } else { + errs() << id << " " << __FILE__ << ":" << __LINE__ << "\n"; + } + ++id; + } + //TODO: reduce the number of virtual registers used, by using + // an algorithms that walks the dominatortree and does value liveness + // analysis. + return Changed; +} + +void ClamBCRegAllocAnalysis::dump() const +{ + for (ValueIDMap::const_iterator I = ValueMap.begin(), E = ValueMap.end(); + I != E; ++I) { + errs() << *I->first << " = " << I->second << "\n"; + } +} + +void ClamBCRegAllocAnalysis::revdump() const +{ + for (unsigned i = 0; i < RevValueMap.size(); ++i) { + errs() << i << ": "; + RevValueMap[i]->print(errs(), 0); + errs() << "\n"; + } +} + +unsigned ClamBCRegAllocAnalysis::buildReverseMap(std::vector &reverseMap) +{ + // Check using the older building code to determine changes due to building difference + // Note: this code can be removed if necessary + unsigned max = 0; + for (ValueIDMap::iterator I = ValueMap.begin(), E = ValueMap.end(); I != E; ++I) { + if (const Instruction *II = dyn_cast(I->first)) { + if (SkipMap.count(II)) + continue; + } + if (I->second == ~0u) + continue; + if (I->second > max) + max = I->second; + } + if ((max != 0) && (max + 1 != RevValueMap.size())) { + errs() << "mismatch in expected number of values in map at "; + errs() << __FILE__ << ":" << __LINE__ << "\n"; + errs() << "found " << max + 1 << ", expected " << RevValueMap.size() << "\n"; + revdump(); + assert(max + 1 == RevValueMap.size()); + return 0; + } + + // New building code, copies previously-built vector + reverseMap.resize(RevValueMap.size()); + for (unsigned i = 0; i < RevValueMap.size(); ++i) { + reverseMap[i] = RevValueMap[i]; + } + return RevValueMap.size(); +} + +void ClamBCRegAllocAnalysis::getAnalysisUsage(AnalysisUsage &AU) const +{ + AU.addRequired(); + + // Preserve the CFG, we only eliminate PHIs, and introduce some + // loads/stores. + AU.setPreservesCFG(); +} + + + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRegAlloc", "v0.1", + [](PassBuilder &PB) { + PB.registerAnalysisRegistrationCallback( + [](FunctionAnalysisManager &mam) { + mam.registerPass([] () { return ClamBCRegAllocAnalyzer(); } ); + } + ); + } + }; +} + + + diff --git a/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h new file mode 100644 index 0000000000..81c4a04445 --- /dev/null +++ b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h @@ -0,0 +1,114 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef CLAMBC_REGALLOC_H +#define CLAMBC_REGALLOC_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "Common/clambc.h" + +class ClamBCRegAllocAnalysis +{ + public: + static char ID; + explicit ClamBCRegAllocAnalysis() {} + + unsigned buildReverseMap(std::vector &); + bool skipInstruction(const llvm::Instruction *I) const + { + return SkipMap.count(I); + } + + unsigned getValueID(const llvm::Value *V) const + { + ValueIDMap::const_iterator I = ValueMap.find(V); + if (I == ValueMap.end()) { + DEBUGERR << "Error Value ID requested for unknown value (Printing below).\n"; + DEBUGERR << *V << "\n"; + assert(0 && "Value ID requested for unknown value"); + } + assert(I->second != ~0u && + "Value ID requested for unused/void instruction!"); + return I->second; + } + virtual bool runOnFunction(llvm::Function &F); + virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const; + void dump() const; + void revdump() const; + + virtual void setDominatorTree(llvm::DominatorTree* dt){ + DT = dt; + } + + private: + void handlePHI(llvm::PHINode *PN); + typedef llvm::DenseMap ValueIDMap; + ValueIDMap ValueMap; + std::vector RevValueMap; + llvm::DenseSet SkipMap; + llvm::DominatorTree *DT; + +}; + +class ClamBCRegAllocAnalyzer : public llvm::AnalysisInfoMixin { + + protected: + ClamBCRegAllocAnalysis clamBCRegAllocAnalysis; + + public: + + ClamBCRegAllocAnalyzer (){} + virtual ~ClamBCRegAllocAnalyzer (){} + + friend AnalysisInfoMixin ; + static llvm::AnalysisKey Key; + typedef ClamBCRegAllocAnalysis Result; + + ClamBCRegAllocAnalysis & run(llvm::Function & F, llvm::FunctionAnalysisManager & fam){ + + llvm::DominatorTree & dt = fam.getResult(F); + clamBCRegAllocAnalysis.setDominatorTree(&dt); + clamBCRegAllocAnalysis.runOnFunction(F); + clamBCRegAllocAnalysis.setDominatorTree(NULL); + + return clamBCRegAllocAnalysis; + } +}; + +#endif //CLAMBC_REGALLOC_H + + + + diff --git a/libclambcc/ClamBCRemoveFSHL/CMakeLists.txt b/libclambcc/ClamBCRemoveFSHL/CMakeLists.txt new file mode 100644 index 0000000000..baf9c814c4 --- /dev/null +++ b/libclambcc/ClamBCRemoveFSHL/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremovefshl object library +# +add_library(clambcremovefshl_obj OBJECT) +target_sources(clambcremovefshl_obj + PRIVATE + ClamBCRemoveFSHL.cpp +) + +target_include_directories(clambcremovefshl_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremovefshl_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremovefshl_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremovefshl shared library. +# +add_library( clambcremovefshl SHARED ) +target_link_libraries( clambcremovefshl + PUBLIC + clambcremovefshl_obj ) +set_target_properties( clambcremovefshl PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremovefshl PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremovefshl PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremovefshl DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremovefshl DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp b/libclambcc/ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp new file mode 100644 index 0000000000..80c20230ec --- /dev/null +++ b/libclambcc/ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp @@ -0,0 +1,183 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/clambc.h" +#include "Common/ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + + +namespace +{ + /* + * Remove fshl intrinsic because it's not supported by our runtime. + */ + struct ClamBCRemoveFSHL : public PassInfoMixin + { + protected: + Module *pMod = nullptr; + + FunctionType * fshlType = nullptr; + + virtual llvm::FunctionType * getFSHLFunctionType(Type * functionArgType){ + return FunctionType::get(functionArgType, {functionArgType, functionArgType, functionArgType}, false); + } + + virtual llvm::Function * addFunction64(IntegerType * functionArgType, const char * const functionName){ + /*Will determine if this is necessary during the rc phase.*/ +#if 0 + This is an example function, needs to be converted to IR +static uint8_t fshl8_noshifts(uint8_t left, uint8_t right, uint8_t shift){ + uint8_t ret = 0; + uint8_t bitwidth = 8; + uint8_t bitIdx = (2 * bitwidth) - (shift % bitwidth) - 1; + uint8_t bit; + + for (size_t i = 0; i < bitwidth; i++){ + if (bitIdx >= bitwidth) { + bit = (left & (1 << (bitIdx - bitwidth))) ? 1 : 0; + ret |= (bit << ((bitwidth - 1) - i)); + } else { + bit = right & (1 << bitIdx); + ret |= (bit << ((bitwidth - 1) - i)); + } + bitIdx-- ; + } + + return ret; +} + +#endif + assert (0 && "Unimplemented"); + } + + /* + * addFunction was based on this. + * static uint8_t fshl8_shifts(uint8_t left, uint8_t right, uint8_t shift){ + * uint16_t tmp = (left << 8) | right; + * tmp <<= (shift % 8); + * tmp = (tmp & 0xff00) >> 8; + * return (uint8_t) (tmp & 0xff); + * } + + */ + virtual llvm::Function * addFunction(IntegerType * functionArgType, const char * const functionName){ + + if (64 == functionArgType->getBitWidth()){ + return addFunction64(functionArgType, functionName); + } + + FunctionType * ft = getFSHLFunctionType(functionArgType); + IntegerType * i64 = IntegerType::get(pMod->getContext(), 64); + ConstantInt * pciBitWidth = ConstantInt::get(i64, functionArgType->getBitWidth()); + + llvm::Function * fshl = Function::Create(ft, GlobalValue::InternalLinkage, functionName, *pMod); + Value * pLeft = fshl->getArg(0); + Value * pRight = fshl->getArg(1); + Value * pShift = fshl->getArg(2); + BasicBlock * pEntry = BasicBlock::Create(pMod->getContext(), "entry", fshl); + + pLeft = CastInst::CreateZExtOrBitCast(pLeft, i64, "zext_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Shl, pLeft, pciBitWidth, "shl_", pEntry); + pRight = CastInst::CreateZExtOrBitCast(pRight, i64, "zext_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Or, pLeft, pRight, "or", pEntry); + pShift = CastInst::CreateZExtOrBitCast(pShift, i64, "zext_", pEntry); + + pShift = BinaryOperator::Create(Instruction::URem, pShift, pciBitWidth, "urem_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Shl, pLeft, pShift, "shl_", pEntry); + + pLeft = BinaryOperator::Create(Instruction::LShr, pLeft, pciBitWidth, "shr_", pEntry); + pLeft = CastInst::CreateTruncOrBitCast(pLeft, functionArgType, "trunc_", pEntry); + ReturnInst::Create(pMod->getContext(), pLeft, pEntry); + + return fshl; + } + + virtual bool replaceCalls(const char * const intrinsicName, const char * functionName, IntegerType * functionArgType){ + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()){ + Function * fshl = addFunction(functionArgType, functionName); + replaceAllCalls(getFSHLFunctionType(functionArgType), fshl, calls, "ClamBCRemoveFSHL_"); + + return true; + } + return false; + } + + public: + + virtual ~ClamBCRemoveFSHL() {} + + /*TODO: + * Add this to validator.*/ + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.fshl.i32", ".fshl.i32", Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.fshl.i16", ".fshl.i16", Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.fshl.i8", ".fshl.i8", Type::getInt16Ty(pMod->getContext())); + + if (bRet){ + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + + }; // end of struct ClamBCRemoveFSHL + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveFSHL", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-fshl"){ + FPM.addPass(ClamBCRemoveFSHL()); + return true; + } + return false; + } + ); + } + }; +} + + + diff --git a/libclambcc/ClamBCRemoveFSHL/test.c.txt b/libclambcc/ClamBCRemoveFSHL/test.c.txt new file mode 100644 index 0000000000..e1477c35a4 --- /dev/null +++ b/libclambcc/ClamBCRemoveFSHL/test.c.txt @@ -0,0 +1,88 @@ +#include +#include +#include +#include + +#define LIDX 1 +#define RIDX 2 +#define SIDX 3 +#define BWIDX 4 + +static uint8_t fshl8_noshifts(uint8_t left, uint8_t right, uint8_t shift){ + uint8_t ret = 0; + uint8_t bitwidth = 8; + uint8_t bitIdx = (2 * bitwidth) - (shift % bitwidth) - 1; + uint8_t bit; + + for (size_t i = 0; i < bitwidth; i++){ + if (bitIdx >= bitwidth) { + bit = (left & (1 << (bitIdx - bitwidth))) ? 1 : 0; + ret |= (bit << ((bitwidth - 1) - i)); + } else { + bit = right & (1 << bitIdx); + ret |= (bit << ((bitwidth - 1) - i)); + } + bitIdx-- ; + } + + return ret; +} + +static uint8_t fshl8_shifts(uint8_t left, uint8_t right, uint8_t shift){ + + uint16_t tmp = (left << 8) | right; + tmp <<= (shift % 8); +#if 0 + tmp = (tmp & 0xff00) >> 8; +#else + tmp = tmp >> 8; +#endif + + return (uint8_t) (tmp & 0xff); + +} + +static void fshl8ascii(char ** argv){ + uint8_t left = atoi(argv[LIDX]); + uint8_t right = atoi(argv[RIDX]); + uint8_t shift = atoi(argv[SIDX]); + + uint8_t shiftsRet, noshiftsRet; + + shiftsRet = fshl8_shifts(left, right, shift); + + noshiftsRet = fshl8_noshifts(left, right, shift); + + if (shiftsRet != noshiftsRet){ + fprintf(stderr, "Incorrect value = '0x%x'\n", noshiftsRet); + fprintf(stderr, "Correct value = '0x%x'\n", shiftsRet); + exit(11); + } + + printf ("left = 0x%x, right = 0x%x, shift = 0x%x, result = 0x%x\n", left, right, shift, shiftsRet); + +} + +int main(int argc, char ** argv){ + int ret = -1; + uint8_t bitwidth = 0; + if (5 > argc){ + fprintf(stderr, "usage: %s \n", argv[0]); + goto done; + } + + bitwidth = atoi(argv[BWIDX]); + printf("bitwidth = '%d'\n", bitwidth); + switch (bitwidth){ + case 8: + fshl8ascii(argv); + break; + default: + fprintf(stderr, "Unsupported bitwidth of '%d'\n", bitwidth); + goto done; + } + + ret = 0; +done: + return ret; +} diff --git a/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt b/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt new file mode 100644 index 0000000000..2fa8b0ba2c --- /dev/null +++ b/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremovefreezeinsts object library +# +add_library(clambcremovefreezeinsts_obj OBJECT) +target_sources(clambcremovefreezeinsts_obj + PRIVATE + ClamBCRemoveFreezeInsts.cpp +) + +target_include_directories(clambcremovefreezeinsts_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremovefreezeinsts_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremovefreezeinsts_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremovefreezeinsts shared library. +# +add_library( clambcremovefreezeinsts SHARED ) +target_link_libraries( clambcremovefreezeinsts + PUBLIC + clambcremovefreezeinsts_obj ) +set_target_properties( clambcremovefreezeinsts PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremovefreezeinsts PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremovefreezeinsts PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremovefreezeinsts DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremovefreezeinsts DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp b/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp new file mode 100644 index 0000000000..84679e9dea --- /dev/null +++ b/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp @@ -0,0 +1,124 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/clambc.h" +#include "Common/ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ + /* + * Freeze Instructions are to guarantee sane behaviour in the case of undefs or poison values. The interpreter + * has no notion of freeze instructions, so we are removing them. The verifier will fail if there are undef or + * poison values in the IR, so this is safe to do. + */ + struct ClamBCRemoveFreezeInsts : public PassInfoMixin + { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + virtual void gatherFreezeInsts(Function *pFunc, std::vector & freezeInsts) { + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++){ + BasicBlock * pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++){ + if (FreezeInst * pfi = llvm::dyn_cast(bi)){ + freezeInsts.push_back(pfi); + } + } + } + + } + + virtual void processFunction(Function *pFunc) { + vector freezeInsts; + gatherFreezeInsts(pFunc, freezeInsts); + + for (size_t i = 0; i < freezeInsts.size(); i++){ + bChanged = true; + + FreezeInst * pfi = freezeInsts[i]; + pfi->replaceAllUsesWith(pfi->getOperand(0)); + pfi->eraseFromParent(); + } + } + + public: + + virtual ~ClamBCRemoveFreezeInsts() {} + + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::cast(i); + if (pFunc->isDeclaration()) { + continue; + } + + processFunction(pFunc); + } + + if (bChanged){ + return PreservedAnalyses::none(); + } else{ + return PreservedAnalyses::all(); + } + } + }; // end of struct ClamBCRemoveFreezeInsts + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveFreezeInsts", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-freeze-insts"){ + FPM.addPass(ClamBCRemoveFreezeInsts()); + return true; + } + return false; + } + ); + } + }; +} + + + diff --git a/libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt b/libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt new file mode 100644 index 0000000000..8b51e7046c --- /dev/null +++ b/libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveicmpsle object library +# +add_library(clambcremoveicmpsle_obj OBJECT) +target_sources(clambcremoveicmpsle_obj + PRIVATE + ClamBCRemoveICMPSLE.cpp +) + +target_include_directories(clambcremoveicmpsle_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveicmpsle_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveicmpsle_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveicmpsle shared library. +# +add_library( clambcremoveicmpsle SHARED ) +target_link_libraries( clambcremoveicmpsle + PUBLIC + clambcremoveicmpsle_obj ) +set_target_properties( clambcremoveicmpsle PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveicmpsle PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveicmpsle PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremoveicmpsle DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremoveicmpsle DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemoveICMPSLE/ClamBCRemoveICMPSLE.cpp b/libclambcc/ClamBCRemoveICMPSLE/ClamBCRemoveICMPSLE.cpp new file mode 100644 index 0000000000..631f349a9f --- /dev/null +++ b/libclambcc/ClamBCRemoveICMPSLE/ClamBCRemoveICMPSLE.cpp @@ -0,0 +1,127 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/clambc.h" +//#include "Common/ClamBCUtilities.h" + +#include +#include +#include + +#include +#include +#include + +#include + +using namespace llvm; +using namespace std; + +/* Modeled after CallGraphAnalysis */ + +namespace +{ + struct ClamBCRemoveICMPSLE : public PassInfoMixin + { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + virtual void gatherInstructions(Function * pFunc, std::vector & insts){ + for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++){ + BasicBlock * pBB = llvm::cast(i); + for (auto bbi = pBB->begin(), bbe = pBB->end(); bbi != bbe; bbi++){ + ICmpInst * inst = llvm::dyn_cast(bbi); + if (inst){ + if ( CmpInst::ICMP_SLE == inst->getPredicate()){ + insts.push_back(inst); + } + } + } + } + } + + virtual void processFunction(Function * pFunc){ + std::vector insts; + gatherInstructions(pFunc, insts); + + for (size_t i = 0; i < insts.size(); i++){ + insts[i]->swapOperands(); + } + + } + public: + + virtual ~ClamBCRemoveICMPSLE() {} + + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++){ + Function * pFunc = llvm::dyn_cast(i); + if (pFunc){ + if (pFunc->isDeclaration()){ + continue; + } + + processFunction(pFunc); + } + } + + + + if (bChanged){ + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); + } + }; // end of struct ClamBCRemoveICMPSLE + + + + + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveICMPSLE", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-icmp-sle"){ + FPM.addPass(ClamBCRemoveICMPSLE()); + return true; + } + return false; + } + ); + +} +}; +} + + + diff --git a/libclambcc/ClamBCRemovePointerPHIs/CMakeLists.txt b/libclambcc/ClamBCRemovePointerPHIs/CMakeLists.txt new file mode 100644 index 0000000000..60bbdc45e5 --- /dev/null +++ b/libclambcc/ClamBCRemovePointerPHIs/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremovepointerphis object library +# +add_library(clambcremovepointerphis_obj OBJECT) +target_sources(clambcremovepointerphis_obj + PRIVATE + ClamBCRemovePointerPHIs.cpp +) + +target_include_directories(clambcremovepointerphis_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremovepointerphis_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremovepointerphis_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremovepointerphis shared library. +# +add_library( clambcremovepointerphis SHARED ) +target_link_libraries( clambcremovepointerphis + PUBLIC + clambcremovepointerphis_obj ) +set_target_properties( clambcremovepointerphis PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremovepointerphis PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremovepointerphis PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremovepointerphis DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremovepointerphis DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp b/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp index 4c8409641c..bef2c7ac82 100644 --- a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp +++ b/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp @@ -1,4 +1,4 @@ - +// #include #include #include @@ -8,8 +8,10 @@ #include -#include +//#include #include +#include +#include #include "Common/clambc.h" #include "Common/ClamBCUtilities.h" @@ -20,12 +22,12 @@ using namespace llvm; namespace { -class ClambcRemovePointerPHIs : public FunctionPass +class ClamBCRemovePointerPHIs : public PassInfoMixin { protected: - Function *pFunc = nullptr; + llvm::Module * pMod = nullptr; - std::vector gatherPHIs() + std::vector gatherPHIs(llvm::Function * pFunc) { std::vector ret; @@ -178,14 +180,13 @@ class ClambcRemovePointerPHIs : public FunctionPass if (not pn->getType()->isPointerTy()) { return false; } - //std::vector delLst; Value *pBasePtr = findBasePointer(pn); if (nullptr == pBasePtr) { /*No unique base pointer.*/ return false; } - IntegerType *pType = Type::getInt64Ty(pFunc->getParent()->getContext()); + IntegerType *pType = Type::getInt64Ty(pMod->getContext()); Constant *zero = ConstantInt::get(pType, 0); Value *initValue = zero; PHINode *idxNode = PHINode::Create(pType, pn->getNumIncomingValues(), "ClamBCRemovePointerPHIs_idx_", pn); @@ -226,7 +227,13 @@ class ClambcRemovePointerPHIs : public FunctionPass std::vector newInsts; Instruction *insPt = findFirstNonPHI(pn->getParent()); - Instruction *gepiNew = GetElementPtrInst::Create(nullptr, pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); + PointerType * pt = llvm::dyn_cast(pBasePtr->getType()); + if (nullptr == pt){ + assert (0 && "This pass is only for pointer phis, how did we get here???"); + } + Type * elementType = pt->getPointerElementType(); + + Instruction *gepiNew = GetElementPtrInst::Create(elementType, pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); if (pn->getType() != gepiNew->getType()) { gepiNew = CastInst::CreatePointerCast(gepiNew, pn->getType(), "ClamBCRemovePointerPHIs_cast_", insPt); } @@ -283,17 +290,23 @@ class ClambcRemovePointerPHIs : public FunctionPass } public: - static char ID; - ClambcRemovePointerPHIs() - : FunctionPass(ID) {} + ClamBCRemovePointerPHIs() {} - bool runOnFunction(Function &F) override + virtual PreservedAnalyses run(Module & m, ModuleAnalysisManager & mam) { - pFunc = &F; + /*Currently unused. Will remove after the RC phase.*/ + DEBUGERR << "TODO: EVALUATE WHETHER OR NOT I NEED THIS" << "\n"; return PreservedAnalyses::all(); + + pMod = &m; bool ret = false; - std::vector phis = gatherPHIs(); + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++){ + llvm::Function * pFunc = llvm::dyn_cast(i); + if (nullptr == pFunc){ + continue; + } + std::vector phis = gatherPHIs(pFunc); for (size_t i = 0; i < phis.size(); i++) { PHINode *pn = phis[i]; @@ -301,15 +314,39 @@ class ClambcRemovePointerPHIs : public FunctionPass ret = true; } } + } - return ret; + if (ret){ + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of class ClambcRemovePointerPHIs } // end of anonymous namespace -char ClambcRemovePointerPHIs::ID = 0; -static RegisterPass X("clambc-remove-pointer-phis", "Remove PHI Nodes with pointers", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemovePointerPHIs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-pointer-phis"){ + FPM.addPass(ClamBCRemovePointerPHIs()); + return true; + } + return false; + } + ); + } + }; +} + + + + + diff --git a/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp b/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp deleted file mode 100644 index 012c91773b..0000000000 --- a/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp +++ /dev/null @@ -1,116 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "Common/clambc.h" - -using namespace llvm; - -namespace -{ -class RemoveSelectInsts : public ModulePass -{ - protected: - bool bChanged = false; - Module* pMod = nullptr; - - void processBasicBlock(BasicBlock* pBB, std::vector& selects) - { - for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { - SelectInst* pSelect = llvm::dyn_cast(i); - if (pSelect) { - selects.push_back(pSelect); - } - } - } - - void processFunction(Function* pFunc, std::vector& selects) - { - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - BasicBlock* pBB = llvm::cast(i); - processBasicBlock(pBB, selects); - } - } - - std::vector gatherSelects() - { - std::vector selects; - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function* pFunc = llvm::cast(i); - - processFunction(pFunc, selects); - } - - return selects; - } - - Instruction* getAllocaInsertPoint(SelectInst* pSelect) - { - BasicBlock* entryBlock = llvm::cast(pSelect->getParent()->getParent()->begin()); - for (auto i = entryBlock->begin(), e = entryBlock->end(); i != e; i++) { - Instruction* pInst = llvm::cast(i); - if (not llvm::isa(pInst)) { - return pInst; - } - } - - assert(0 && "MALFORMED BASIC BLOCK"); - return nullptr; - } - - void replaceSelectInst(SelectInst* pSelect) - { - - Instruction* insertBefore = getAllocaInsertPoint(pSelect); - AllocaInst* pAlloca = new AllocaInst(pSelect->getType(), - pMod->getDataLayout().getProgramAddressSpace(), - "ClamBCRemoveSelectInst", insertBefore); - - BasicBlock* pBB = llvm::cast(pSelect->getParent()); - - BasicBlock* pSplit = pBB->splitBasicBlock(pSelect, "ClamBCRemoveSelectInst"); - new StoreInst(pSelect->getFalseValue(), pAlloca, pBB->getTerminator()); - - new StoreInst(pSelect->getTrueValue(), pAlloca, pSelect); - - BasicBlock* pSplit2 = pSplit->splitBasicBlock(pSelect, "ClamBCRemoveSelectInst"); - BranchInst::Create(pSplit, pSplit2, pSelect->getCondition(), pBB->getTerminator()); - - LoadInst* pLoad = new LoadInst(pAlloca->getType()->getPointerElementType(), pAlloca, "ClamBCRemoveSelectInst", pSelect); - pSelect->replaceAllUsesWith(pLoad); - - pBB->getTerminator()->eraseFromParent(); - pSelect->eraseFromParent(); - } - - public: - static char ID; - RemoveSelectInsts() - : ModulePass(ID) {} - - virtual bool runOnModule(Module& m) override - { - pMod = &m; - - std::vector selects = gatherSelects(); - for (size_t i = 0; i < selects.size(); i++) { - SelectInst* pSelect = selects[i]; - - replaceSelectInst(pSelect); - } - - return bChanged; - } -}; // end of struct RemoveSelectInsts -} // end of anonymous namespace - -char RemoveSelectInsts::ID = 0; -static RegisterPass X("remove-selects", "RemoveSelectInsts Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCRemoveUSUB/CMakeLists.txt b/libclambcc/ClamBCRemoveUSUB/CMakeLists.txt new file mode 100644 index 0000000000..63fa07cfb6 --- /dev/null +++ b/libclambcc/ClamBCRemoveUSUB/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveusub object library +# +add_library(clambcremoveusub_obj OBJECT) +target_sources(clambcremoveusub_obj + PRIVATE + ClamBCRemoveUSUB.cpp +) + +target_include_directories(clambcremoveusub_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveusub_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveusub_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveusub shared library. +# +add_library( clambcremoveusub SHARED ) +target_link_libraries( clambcremoveusub + PUBLIC + clambcremoveusub_obj ) +set_target_properties( clambcremoveusub PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveusub PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveusub PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremoveusub DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremoveusub DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp b/libclambcc/ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp new file mode 100644 index 0000000000..e83b9378a0 --- /dev/null +++ b/libclambcc/ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp @@ -0,0 +1,144 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/clambc.h" +#include "Common/ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ + /* + * Remove usub intrinsic because it's not supported by our runtime. + */ + struct ClamBCRemoveUSUB : public PassInfoMixin + { + protected: + Module *pMod = nullptr; + const char * const USUB_NAME = ".usub"; + + FunctionType * usubType = nullptr; + + virtual llvm::FunctionType * getUSUBFunctionType(Type * functionArgType){ + return FunctionType::get(functionArgType, {functionArgType, functionArgType}, false); + } + + virtual llvm::Function * addUSUB(Type * functionArgType){ + uint32_t addressSpace = pMod->getDataLayout().getProgramAddressSpace(); + + FunctionType * ft = getUSUBFunctionType(functionArgType); + + llvm::Function * usub = Function::Create(ft, GlobalValue::InternalLinkage, USUB_NAME, *pMod); + Value * pLeft = usub->getArg(0); + Value * pRight = usub->getArg(1); + BasicBlock * pEntry = BasicBlock::Create(pMod->getContext(), "entry", usub); + BasicBlock * pLHS = BasicBlock::Create(pMod->getContext(), "left", usub); + BasicBlock * pRHS = BasicBlock::Create(pMod->getContext(), "right", usub); + BasicBlock * pRetBlock = BasicBlock::Create(pMod->getContext(), "ret", usub); + + //entry block + AllocaInst * retVar = new AllocaInst(functionArgType, addressSpace , "ret", pEntry); + ICmpInst * cmp = new ICmpInst(*pEntry, CmpInst::ICMP_UGT, pLeft, pRight, "icmp"); + BranchInst::Create(pLHS, pRHS, cmp, pEntry); + + //left > right + new StoreInst (BinaryOperator::Create(Instruction::Sub, pLeft, pRight, "ClamBCRemoveUSUB_", pLHS), retVar, pLHS); + BranchInst::Create(pRetBlock, pLHS); + + //right >= left + new StoreInst (ConstantInt::get(functionArgType, 0), retVar, pRHS); + BranchInst::Create(pRetBlock, pRHS); + + LoadInst * pli = new LoadInst(functionArgType, retVar, "load", pRetBlock); + ReturnInst::Create(pMod->getContext(), pli, pRetBlock); + return usub; + } + + virtual bool replaceCalls(const char * const intrinsicName, Type * functionArgType){ + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()){ + Function * usub = addUSUB(functionArgType); + replaceAllCalls(getUSUBFunctionType(functionArgType), usub, calls, "ClamBCRemoveUSUB_"); + + return true; + } + return false; + } + + public: + + virtual ~ClamBCRemoveUSUB() {} + + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.usub.sat.i32", Type::getInt32Ty(pMod->getContext())); +// bRet |= replaceCalls("llvm.usub.i16", Type::getInt16Ty(pMod->getContext())); + + + DEBUGERR << "TODO: ADD usub detection to the validator" << "\n"; + + if (bRet){ + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + + }; // end of struct ClamBCRemoveUSUB + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUSUB", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-usub"){ + FPM.addPass(ClamBCRemoveUSUB()); + return true; + } + return false; + } + ); + } + }; +} + + + diff --git a/libclambcc/ClamBCRemoveUndefs/CMakeLists.txt b/libclambcc/ClamBCRemoveUndefs/CMakeLists.txt new file mode 100644 index 0000000000..a8403ede67 --- /dev/null +++ b/libclambcc/ClamBCRemoveUndefs/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveundefs object library +# +add_library(clambcremoveundefs_obj OBJECT) +target_sources(clambcremoveundefs_obj + PRIVATE + ClamBCRemoveUndefs.cpp +) + +target_include_directories(clambcremoveundefs_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveundefs_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveundefs_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveundefs shared library. +# +add_library( clambcremoveundefs SHARED ) +target_link_libraries( clambcremoveundefs + PUBLIC + clambcremoveundefs_obj ) +set_target_properties( clambcremoveundefs PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveundefs PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveundefs PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremoveundefs DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremoveundefs DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp b/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp index 2151b1142f..4d6a717268 100644 --- a/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp +++ b/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp @@ -1,19 +1,38 @@ #include -#include "llvm/IR/Module.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include #include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +//#include +#include #include "Common/clambc.h" #include "Common/ClamBCUtilities.h" using namespace llvm; + + +#if 0 + + +THIS APPEARS TO NO LONGER BE NEEDED. LEAVING IN PLACE DURING THE RC PHASE, JUST IN CASE. + + + +#endif + + + + + + + namespace { /* @@ -32,7 +51,7 @@ namespace store %struct._state* %state, %struct._state** %state.addr, align 8 store i32 %sizeof_state, i32* %sizeof_state.addr, align 4 */ -class ClamBCRemoveUndefs : public ModulePass +struct ClamBCRemoveUndefs : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; @@ -56,10 +75,17 @@ class ClamBCRemoveUndefs : public ModulePass FunctionType *rterrTy = FunctionType::get( Type::getInt32Ty(BB->getContext()), {Type::getInt32Ty(BB->getContext())}, false); +#if 0 Constant *func_abort = BB->getParent()->getParent()->getOrInsertFunction("abort", abrtTy); Constant *func_rterr = BB->getParent()->getParent()->getOrInsertFunction("bytecode_rt_error", rterrTy); +#else + //DEBUGERR << "DON'T KNOW IF THIS WILL WORK, REMOVE IFDEF LATER" << "\n"; + FunctionCallee func_abort = BB->getParent()->getParent()->getOrInsertFunction("abort", abrtTy); + FunctionCallee func_rterr = + BB->getParent()->getParent()->getOrInsertFunction("bytecode_rt_error", rterrTy); +#endif BasicBlock *abort = BasicBlock::Create(BB->getContext(), "rterr.trig", BB->getParent()); Constant *PN = ConstantInt::get(Type::getInt32Ty(BB->getContext()), 99); if (MDDbgKind) { @@ -217,14 +243,21 @@ class ClamBCRemoveUndefs : public ModulePass } public: - static char ID; + //static char ID; ClamBCRemoveUndefs() - : ModulePass(ID) {} + /* : ModulePass(ID) */ {} virtual ~ClamBCRemoveUndefs() {} +#if 0 bool runOnModule(Module &m) override +#else + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) +#endif { + /*This no longer appears to be needed. Will keep it during the -rc phase and then remove.*/ + return PreservedAnalyses::all(); + pMod = &m; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -240,13 +273,44 @@ class ClamBCRemoveUndefs : public ModulePass delLst[i]->eraseFromParent(); } +#if 0 return bChanged; +#else + if (bChanged){ + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); +#endif } }; // end of struct ClamBCRemoveUndefs } // end of anonymous namespace +#if 0 char ClamBCRemoveUndefs::ID = 0; static RegisterPass X("clambc-remove-undefs", "Remove Undefs", false /* Only looks at CFG */, false /* Analysis Pass */); +#else + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUndefs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-undefs"){ + FPM.addPass(ClamBCRemoveUndefs()); + return true; + } + return false; + } + ); + } + }; +} + +#endif diff --git a/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/CMakeLists.txt b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/CMakeLists.txt new file mode 100644 index 0000000000..ef75882a11 --- /dev/null +++ b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveunsupportedicmpintrinsics object library +# +add_library(clambcremoveunsupportedicmpintrinsics_obj OBJECT) +target_sources(clambcremoveunsupportedicmpintrinsics_obj + PRIVATE + ClamBCRemoveUnsupportedICMPIntrinsics.cpp +) + +target_include_directories(clambcremoveunsupportedicmpintrinsics_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveunsupportedicmpintrinsics_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveunsupportedicmpintrinsics_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveunsupportedicmpintrinsics shared library. +# +add_library( clambcremoveunsupportedicmpintrinsics SHARED ) +target_link_libraries( clambcremoveunsupportedicmpintrinsics + PUBLIC + clambcremoveunsupportedicmpintrinsics_obj ) +set_target_properties( clambcremoveunsupportedicmpintrinsics PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveunsupportedicmpintrinsics PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveunsupportedicmpintrinsics PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcremoveunsupportedicmpintrinsics DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcremoveunsupportedicmpintrinsics DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/ClamBCRemoveUnsupportedICMPIntrinsics.cpp b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/ClamBCRemoveUnsupportedICMPIntrinsics.cpp new file mode 100644 index 0000000000..58c8c5da04 --- /dev/null +++ b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/ClamBCRemoveUnsupportedICMPIntrinsics.cpp @@ -0,0 +1,156 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "Common/clambc.h" +#include "Common/ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ + /* + * Remove smin intrinsic because it's not supported by our runtime. + */ + struct ClamBCRemoveUnsupportedICMPIntrinsics : public PassInfoMixin + { + protected: + Module *pMod = nullptr; + //const char * const UnsupportedICMPIntrinsics_NAME = ".smin"; + + FunctionType * sminType = nullptr; + + virtual llvm::FunctionType * getUnsupportedICMPIntrinsicsFunctionType(Type * functionArgType){ + return FunctionType::get(functionArgType, {functionArgType, functionArgType}, false); + } + + virtual llvm::Function * addFunction(Type * functionArgType, + const char * const newName, + llvm::CmpInst::Predicate predicate){ + + uint32_t addressSpace = pMod->getDataLayout().getProgramAddressSpace(); + + FunctionType * ft = getUnsupportedICMPIntrinsicsFunctionType(functionArgType); + + llvm::Function * smin = Function::Create(ft, GlobalValue::InternalLinkage, newName, *pMod); + Value * pLeft = smin->getArg(0); + Value * pRight = smin->getArg(1); + BasicBlock * pEntry = BasicBlock::Create(pMod->getContext(), "entry", smin); + BasicBlock * pLHS = BasicBlock::Create(pMod->getContext(), "left", smin); + BasicBlock * pRHS = BasicBlock::Create(pMod->getContext(), "right", smin); + BasicBlock * pRetBlock = BasicBlock::Create(pMod->getContext(), "ret", smin); + + //entry block + AllocaInst * retVar = new AllocaInst(functionArgType, addressSpace , "ret", pEntry); + ICmpInst * cmp = new ICmpInst(*pEntry, predicate, pLeft, pRight, "icmp"); + BranchInst::Create(pLHS, pRHS, cmp, pEntry); + + //left > right + new StoreInst (pLeft, retVar, pLHS); + BranchInst::Create(pRetBlock, pLHS); + + //right >= left + new StoreInst (pRight, retVar, pRHS); + BranchInst::Create(pRetBlock, pRHS); + + LoadInst * pli = new LoadInst(functionArgType, retVar, "load", pRetBlock); + ReturnInst::Create(pMod->getContext(), pli, pRetBlock); + return smin; + } + + virtual bool replaceCalls(const char * const intrinsicName, + const char * newName, + llvm::CmpInst::Predicate predicate, + Type * functionArgType){ + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()){ + Function * smin = addFunction(functionArgType, newName, predicate); + replaceAllCalls(getUnsupportedICMPIntrinsicsFunctionType(functionArgType), smin, calls, "ClamBCRemoveUnsupportedICMPIntrinsics_"); + + return true; + } + return false; + } + + public: + + virtual ~ClamBCRemoveUnsupportedICMPIntrinsics() {} + + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.smin.i32", ".smin.32", CmpInst::ICMP_SLT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smin.i16", ".smin.16", CmpInst::ICMP_SLT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umin.i16", ".umin.16", CmpInst::ICMP_ULT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umin.i32", ".umin.32", CmpInst::ICMP_ULT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umax.i32", ".umax.32", CmpInst::ICMP_UGT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umax.i16", ".umax.16", CmpInst::ICMP_UGT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smax.i32", ".smax.32", CmpInst::ICMP_SGT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smax.i16", ".smax.16", CmpInst::ICMP_SGT, Type::getInt16Ty(pMod->getContext())); + + + DEBUGERR << "TODO: ADD smin detection to the validator" << "\n"; + + if (bRet){ + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + + }; // end of struct ClamBCRemoveUnsupportedICMPIntrinsics + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUnsupportedICMPIntrinsics", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-remove-unsupported-icmp-intrinsics"){ + FPM.addPass(ClamBCRemoveUnsupportedICMPIntrinsics()); + return true; + } + return false; + } + ); + } + }; +} + + + diff --git a/libclambcc/ClamBCTrace/CMakeLists.txt b/libclambcc/ClamBCTrace/CMakeLists.txt new file mode 100644 index 0000000000..a2c73e645a --- /dev/null +++ b/libclambcc/ClamBCTrace/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambctrace object library +# +add_library(clambctrace_obj OBJECT) +target_sources(clambctrace_obj + PRIVATE + ClamBCTrace.cpp +) + +target_include_directories(clambctrace_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambctrace_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambctrace_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambctrace shared library. +# +add_library( clambctrace SHARED ) +target_link_libraries( clambctrace + PUBLIC + clambctrace_obj ) +set_target_properties( clambctrace PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambctrace PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambctrace PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambctrace DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambctrace DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCTrace/ClamBCTrace.cpp b/libclambcc/ClamBCTrace/ClamBCTrace.cpp index 2943147ad2..2eb04d0b23 100644 --- a/libclambcc/ClamBCTrace/ClamBCTrace.cpp +++ b/libclambcc/ClamBCTrace/ClamBCTrace.cpp @@ -19,10 +19,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "clambc.h" -#include "ClamBCModule.h" -#include "ClamBCCommon.h" -#include "ClamBCUtilities.h" +#include "Common/clambc.h" +#include "Common/ClamBCCommon.h" +#include "Common/ClamBCUtilities.h" #include #include @@ -34,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -55,22 +56,18 @@ static cl::opt InsertTracing("clambc-trace", cl::Hidden, cl::init(false), cl::desc("Enable tracing of bytecode execution")); -namespace -{ -class ClamBCTrace : public ModulePass +namespace ClamBCTrace { + +class ClamBCTrace : public PassInfoMixin { public: - static char ID; - ClamBCTrace() - : ModulePass(ID) {} + ClamBCTrace() {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Execution Tracing"; } - virtual bool runOnModule(Module &M); + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM); }; -char ClamBCTrace::ID; -} // namespace /* declare i32 @trace_directory(i8*, i32) @@ -87,10 +84,11 @@ declare i32 @trace_ptr(i8*, i32) */ -bool ClamBCTrace::runOnModule(Module &M) +PreservedAnalyses ClamBCTrace::run(Module & M, ModuleAnalysisManager & MAM) { - if (!InsertTracing) - return false; + if (!InsertTracing) { + return PreservedAnalyses::all(); + } unsigned MDDbgKind = M.getContext().getMDKindID("dbg"); DenseMap scopeIDs; unsigned scopeid = 0; @@ -102,16 +100,16 @@ bool ClamBCTrace::runOnModule(Module &M) args.push_back(I32Ty); FunctionType *FTy = FunctionType::get(I32Ty, args, false); /* llvm 10 replaces this with FunctionCallee. */ - Constant *trace_directory = M.getOrInsertFunction("trace_directory", FTy); - Constant *trace_scope = M.getOrInsertFunction("trace_scope", FTy); - Constant *trace_source = M.getOrInsertFunction("trace_source", FTy); - Constant *trace_op = M.getOrInsertFunction("trace_op", FTy); - Constant *trace_value = M.getOrInsertFunction("trace_value", FTy); - Constant *trace_ptr = M.getOrInsertFunction("trace_ptr", FTy); + FunctionCallee trace_directory = M.getOrInsertFunction("trace_directory", FTy); + FunctionCallee trace_scope = M.getOrInsertFunction("trace_scope", FTy); + FunctionCallee trace_source = M.getOrInsertFunction("trace_source", FTy); + FunctionCallee trace_op = M.getOrInsertFunction("trace_op", FTy); + FunctionCallee trace_value = M.getOrInsertFunction("trace_value", FTy); + FunctionCallee trace_ptr = M.getOrInsertFunction("trace_ptr", FTy); assert(trace_scope && trace_source && trace_op && trace_value && trace_directory && trace_ptr); - if (!trace_directory->use_empty() || !trace_scope->use_empty() || !trace_source->use_empty() || !trace_op->use_empty() || - !trace_value->use_empty() || !trace_ptr->use_empty()) { + if (!trace_directory.getCallee()->use_empty() || !trace_scope.getCallee()->use_empty() || !trace_source.getCallee()->use_empty() || !trace_op.getCallee()->use_empty() || + !trace_value.getCallee()->use_empty() || !trace_ptr.getCallee()->use_empty()) { ClamBCStop("Tracing API can only be used by compiler!\n", &M); } @@ -156,7 +154,6 @@ bool ClamBCTrace::runOnModule(Module &M) while (llvm::isa(scope)) { DILexicalBlock *lex = llvm::cast(scope); //scope = lex->getContext(); - /*aragusa: I have no idea if this is the right thing to do here.*/ scope = lex->getScope(); } @@ -197,11 +194,6 @@ bool ClamBCTrace::runOnModule(Module &M) for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); AI != AE; ++AI) { if (isa(AI->getType())) { -#if 0 - Value *V = builder.CreateIntCast(AI, Type::getInt32Ty(M.getContext()), false); - Value *ValueName = builder.CreateGlobalStringPtr(AI->getName().data()); - builder.CreateCall2(trace_value, ValueName, V); -#endif } else if (isa(AI->getType())) { Value *V = builder.CreatePointerCast(AI, PointerType::getUnqual(Type::getInt8Ty(M.getContext()))); @@ -218,13 +210,7 @@ bool ClamBCTrace::runOnModule(Module &M) std::vector args = { Op, ConstantInt::get(Type::getInt32Ty(M.getContext()), Loc->getColumn())}; builder.CreateCall(trace_op, args, "ClamBCTrace_trace_op"); - //Value *ValueName = builder.CreateGlobalStringPtr(II->getName().data()); if (isa(II->getType())) { -#if 0 - builder.SetInsertPoint(&*J, BBIt); - Value *V = builder.CreateIntCast(II, Type::getInt32Ty(M.getContext()), false); - builder.CreateCall2(trace_value, ValueName, V); -#endif } else if (isa(II->getType())) { builder.SetInsertPoint(&*J, BBIt); Value *V = builder.CreatePointerCast(II, @@ -237,10 +223,32 @@ bool ClamBCTrace::runOnModule(Module &M) } } } - return true; + return PreservedAnalyses::none(); } -llvm::ModulePass *createClamBCTrace() -{ - return new ClamBCTrace(); + +} // namespace + + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCTrace", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-trace"){ + FPM.addPass(ClamBCTrace::ClamBCTrace()); + return true; + } + return false; + } + ); + } + }; } + + + diff --git a/libclambcc/ClamBCVerifier/CMakeLists.txt b/libclambcc/ClamBCVerifier/CMakeLists.txt new file mode 100644 index 0000000000..36e7463821 --- /dev/null +++ b/libclambcc/ClamBCVerifier/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcverifier object library +# +add_library(clambcverifier_obj OBJECT) +target_sources(clambcverifier_obj + PRIVATE + ClamBCVerifier.cpp +) + +target_include_directories(clambcverifier_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcverifier_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcverifier_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcverifier shared library. +# +add_library( clambcverifier SHARED ) +target_link_libraries( clambcverifier + PUBLIC + clambcverifier_obj ) +set_target_properties( clambcverifier PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcverifier PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcverifier PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcverifier DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcverifier DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp b/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp index c6f1f290fc..e72da29142 100644 --- a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp +++ b/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp @@ -41,23 +41,28 @@ * } */ + +#include "Common/ClamBCDiagnostics.h" +#include "Common/clambc.h" +#include "Common/ClamBCUtilities.h" + + + + #include -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include -using namespace llvm; -#include "ClamBCDiagnostics.h" -#include "ClamBCModule.h" #include #include -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include +#include +#include +#include #include #include #include @@ -68,28 +73,36 @@ using namespace llvm; #include #include #include -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include #include -#include "llvm/ADT/SmallSet.h" +#include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" -static cl::opt - StopOnFirstError("clambc-stopfirst", cl::init(false), - cl::desc("Stop on first error in the verifier")); -namespace +#include +#include +#include +#include + +#include + +#include + + + + + +using namespace llvm; + + +namespace ClamBCVerifier { -class ClamBCVerifier : public FunctionPass, +class ClamBCVerifier : public PassInfoMixin, public InstVisitor { - ScalarEvolution *SE; - DominatorTree *DT; - BasicBlock *AbrtBB; bool Final; llvm::Module *pMod = nullptr; @@ -110,6 +123,7 @@ class ClamBCVerifier : public FunctionPass, } bool visitSelectInst(SelectInst &I) { + llvm::errs() << "<" << __FUNCTION__<< "::" << __LINE__ << ">" << "Selects need tobe removed, so this should be a false\n"; return true; } bool visitBranchInst(BranchInst &BI) @@ -134,8 +148,23 @@ class ClamBCVerifier : public FunctionPass, return true; } + /* + * FreezeInst's are used to guarantee a value being set to something fixed + * if it is undef or a poison value. They are a noop otherwise, so we will allow + * them in the verifier, and remove them in a pass to be run after the verifier. + * (a 'verifier' shouldn't be changing the IR). + */ + bool visitFreezeInst(FreezeInst &I){ + return true; + } + bool visitInstruction(Instruction &I) { + + DEBUG_VALUE(&I); +#define DEBUG_NODEREF(val) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << val << "\n"; + DEBUG_NODEREF(llvm::isa(&I)); + printDiagnostic("Unhandled instruction in verifier", &I); return false; } @@ -144,7 +173,12 @@ class ClamBCVerifier : public FunctionPass, { Function *ret = pci->getCalledFunction(); if (nullptr == ret) { - Value *v = pci->getCalledValue(); + Value * v = pci->getOperand(0); /*This is the called operand.*/ + if (nullptr == v){ + llvm::errs() << "<" << __LINE__ << ">" << *pci << "\n"; + llvm::errs() << "<" << __LINE__ << ">" << *(pci->getOperand(0)) << "\n"; + assert (0 && "How do I handle function pointers?"); + } if (BitCastOperator *bco = llvm::dyn_cast(v)) { ret = llvm::dyn_cast(bco->getOperand(0)); } @@ -152,34 +186,43 @@ class ClamBCVerifier : public FunctionPass, return ret; } - bool visitCallInst(CallInst &CI) - { - Function *F = getCalledFunctionFromCallInst(&CI); - if (!F) { - printDiagnostic("Indirect call checking not implemented yet!", &CI); - return false; - } - if (F->getCallingConv() != CI.getCallingConv()) { - printDiagnostic("For call to " + F->getName() + ", calling conventions don't match!", &CI); - return false; - } - if (F->isVarArg()) { - if (!F->getFunctionType()->getNumParams()) { + bool validateFunction(const llvm::Function * pFunc){ + + if (pFunc->isVarArg()) { + if (!pFunc->getFunctionType()->getNumParams()) { printDiagnostic(("Calling implicitly declared function '" + - F->getName() + "' is not supported (did you forget to" + pFunc->getName() + "' is not supported (did you forget to" "implement it, or typoed the function name?)") .str(), - &CI); + pFunc); } else { printDiagnostic("Checking calls to vararg functions/functions without" "a prototype is not supported!", - &CI); + pFunc); } return false; } return true; + + } + + bool visitCallInst(CallInst &CI) + { + Function *F = getCalledFunctionFromCallInst(&CI); + if (!F) { + /*Determine if we want to allow indirect calls*/ + printDiagnostic("Indirect call checking not implemented!", &CI); + return false; + } + + if (F->getCallingConv() != CI.getCallingConv()) { + printDiagnostic("For call to " + F->getName() + ", calling conventions don't match!", &CI); + return false; + } + + return validateFunction(F); } bool visitPHINode(PHINode &PN) @@ -209,41 +252,132 @@ class ClamBCVerifier : public FunctionPass, return true; } + virtual bool isHandled(Instruction *pInst){ + bool bRet = llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + || llvm::isa(pInst) + ; + + return bRet; + + } + + virtual bool isUndefOrPoisonValue(Value * pv){ + return llvm::isa(pv); + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce, std::set & visited){ + if (visited.end() != std::find(visited.begin(), visited.end(), pce)) { + return false; + } + visited.insert(pce); + + for (size_t i = 0; i < pce->getNumOperands(); i++){ + Value * pv = pce->getOperand(i); + if (isUndefOrPoisonValue(pv)){ + return true; + } + if (ConstantExpr * ce = llvm::dyn_cast(pv)){ + if (hasUndefsOrPoisonValues(ce, visited)){ + return true; + } + } + } + + return false; + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce){ + std::set visited; + return hasUndefsOrPoisonValues(pce, visited); + } + + /*PoisonValue is derived from UndefValue, so we only have to check for that one.*/ + virtual bool hasUndefsOrPoisonValues(Instruction *pInst){ + for (size_t i = 0; i < pInst->getNumOperands(); i++){ + Value * pVal = pInst->getOperand(i); + if (llvm::isa(pVal)){ + continue; + } + + if (isUndefOrPoisonValue(pVal)){ + return true; + } + + if (ConstantExpr * pce = llvm::dyn_cast(pVal)){ + if (hasUndefsOrPoisonValues(pce)){ + return true; + } + } + + } + return false; + } + + virtual bool walk (Function * pFunc){ + bool bRet = true; + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++){ + BasicBlock * pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++){ + Instruction * pInst = llvm::cast(bi); + if (hasUndefsOrPoisonValues(pInst)){ + printDiagnostic("Poison value or Undef value found in instruction.", pInst); + return false; + } + + if (PHINode * pn = llvm::dyn_cast(pInst)){ + bRet = visitPHINode(*pn); + } else if (CallInst * pci = llvm::dyn_cast(pInst)){ + bRet = visitCallInst(*pci); + } else if (SwitchInst * psi = llvm::dyn_cast(pInst)){ + bRet = visitSwitchInst(*psi); + } else { + bRet = isHandled(pInst); + } + + if (!bRet){ + break; + } + } + } + + return bRet; + } + public: - static char ID; - explicit ClamBCVerifier() - : FunctionPass(ID), Final(false) {} + explicit ClamBCVerifier(): + Final(false) {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Verifier"; } - virtual bool runOnFunction(Function &F) + + + PreservedAnalyses run(Function & F, FunctionAnalysisManager & fam) { pMod = F.getParent(); - AbrtBB = 0; - SE = &getAnalysis().getSE(); - ; - DT = &getAnalysis().getDomTree(); - - bool OK = true; - std::vector insns; - // verifying can insert runtime checks, so be safe and create an initial - // list of instructions to process so we are not affected by transforms. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - insns.push_back(&*I); - } - for (std::vector::iterator I = insns.begin(), E = insns.end(); - I != E; ++I) { - OK &= visit(*I); - if (!OK && StopOnFirstError) - break; + bool OK = validateFunction(&F); + if (OK) { + OK = walk(&F); } - if (!OK) + + if (!OK) { ClamBCStop("Verifier rejected bytecode function due to errors", &F); - return false; + } + + return PreservedAnalyses::all(); } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -252,10 +386,37 @@ class ClamBCVerifier : public FunctionPass, AU.setPreservesAll(); } }; -char ClamBCVerifier::ID = 0; +//char ClamBCVerifier::ID = 0; } // namespace -static RegisterPass X("clambc-verifier", "ClamBCVerifier Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCVerifier", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, FunctionPassManager &FPM, + ArrayRef) { + if(Name == "clambc-verifier"){ + FPM.addPass(ClamBCVerifier::ClamBCVerifier()); + return true; + } + return false; + } + ); + } + }; +} + + + + + + + + + + diff --git a/libclambcc/ClamBCWriter/CMakeLists.txt b/libclambcc/ClamBCWriter/CMakeLists.txt new file mode 100644 index 0000000000..3527d59397 --- /dev/null +++ b/libclambcc/ClamBCWriter/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcwriter object library +# +add_library(clambcwriter_obj OBJECT) +target_sources(clambcwriter_obj + PRIVATE + ClamBCWriter.cpp +) + +target_include_directories(clambcwriter_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcwriter_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcwriter_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcwriter shared library. +# +add_library( clambcwriter SHARED ) +target_link_libraries( clambcwriter + PUBLIC + clambcwriter_obj ) +set_target_properties( clambcwriter PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcwriter PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcwriter PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambcwriter DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambcwriter DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/ClamBCWriter/ClamBCWriter.cpp b/libclambcc/ClamBCWriter/ClamBCWriter.cpp index e1f60a4fba..59ac8ac1b0 100644 --- a/libclambcc/ClamBCWriter/ClamBCWriter.cpp +++ b/libclambcc/ClamBCWriter/ClamBCWriter.cpp @@ -19,10 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" -#include "clambc.h" -#include "ClamBCModule.h" +#include "Common/bytecode_api.h" +#include "Common/clambc.h" +#include "Common/ClamBCModule.h" #include "ClamBCAnalyzer/ClamBCAnalyzer.h" +#include "ClamBCRegAlloc/ClamBCRegAlloc.h" #include "Common/ClamBCUtilities.h" #include @@ -45,6 +46,8 @@ #include #include #include +#include +#include #include #include #include @@ -105,7 +108,7 @@ class ClamBCOutputWriter public: static ClamBCOutputWriter *createClamBCOutputWriter(llvm::StringRef srFileName, llvm::Module *pMod, - ClamBCAnalyzer *pAnalyzer) + ClamBCAnalysis *pAnalyzer) { std::error_code ec; raw_fd_ostream *rfo = new raw_fd_ostream(srFileName, ec); @@ -123,7 +126,7 @@ class ClamBCOutputWriter return ret; } - ClamBCOutputWriter(llvm::formatted_raw_ostream &outStream, llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + ClamBCOutputWriter(llvm::formatted_raw_ostream &outStream, llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) : Out(lineBuffer), OutReal(outStream), maxLineLength(0), lastLinePos(0), pMod(pMod), pAnalyzer(pAnalyzer) { printGlobals(pMod, pAnalyzer); @@ -162,7 +165,7 @@ class ClamBCOutputWriter printFixedNumber(Out, n, fixed); } - void printModuleHeader(Module &M, ClamBCAnalyzer *pAnalyzer, unsigned maxLine) + void printModuleHeader(Module &M, ClamBCAnalysis *pAnalyzer, unsigned maxLine) { NamedMDNode *MinFunc = M.getNamedMetadata("clambc.funcmin"); NamedMDNode *MaxFunc = M.getNamedMetadata("clambc.funcmax"); @@ -251,7 +254,7 @@ class ClamBCOutputWriter assert((OutReal.tell() < 8192) && "OutReal too big"); } - void describeType(llvm::raw_ostream &Out, const Type *Ty, Module *M, ClamBCAnalyzer *pAnalyzer) + void describeType(llvm::raw_ostream &Out, const Type *Ty, Module *M, ClamBCAnalysis *pAnalyzer) { if (const FunctionType *FTy = dyn_cast(Ty)) { printFixedNumber(Out, 1, 1); @@ -310,7 +313,7 @@ class ClamBCOutputWriter if (const PointerType *PTy = dyn_cast(Ty)) { printFixedNumber(Out, 5, 1); - const Type *ETy = PTy->getElementType(); + const Type *ETy = PTy->getPointerElementType(); // pointers to opaque types are treated as i8* int id = -1; if (llvm::isa(ETy)) { @@ -402,7 +405,7 @@ class ClamBCOutputWriter ClamBCStop("Unsupported constant type", &M); } - void printGlobals(llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + void printGlobals(llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) { const std::string &ls = pAnalyzer->getLogicalSignature(); if (ls.empty()) { @@ -441,7 +444,7 @@ class ClamBCOutputWriter // function prototype printNumber(Out, pAnalyzer->getTypeID(F->getFunctionType()), false); // function name - std::string Name = F->getName(); + std::string Name (F->getName()); printConstData(Out, (const unsigned char *)Name.c_str(), Name.size() + 1); } @@ -533,7 +536,7 @@ class ClamBCOutputWriter } } - void finished(llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + void finished(llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) { //maxline+1, 1 more for \0 @@ -617,7 +620,7 @@ class ClamBCOutputWriter int maxLineLength = 0; int lastLinePos = 0; llvm::Module *pMod = nullptr; - ClamBCAnalyzer *pAnalyzer = nullptr; + ClamBCAnalysis *pAnalyzer = nullptr; void printFixedNumber(raw_ostream &Out, unsigned n, unsigned fixed) { @@ -684,7 +687,7 @@ class ClamBCOutputWriter } }; -class ClamBCWriter : public ModulePass, public InstVisitor +class ClamBCWriter : public PassInfoMixin, public InstVisitor { typedef DenseMap BBIDMap; BBIDMap BBMap; @@ -693,7 +696,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor unsigned opcodecvt[Instruction::OtherOpsEnd]; raw_ostream *MapOut = nullptr; FunctionPass *Dumper = nullptr; - ClamBCRegAlloc *RA = nullptr; + ClamBCRegAllocAnalysis *RA = nullptr; unsigned fid, minflvl; MetadataContext *TheMetadata = nullptr; unsigned MDDbgKind; @@ -702,13 +705,13 @@ class ClamBCWriter : public ModulePass, public InstVisitor llvm::Module *pMod = nullptr; ClamBCOutputWriter *pOutputWriter = nullptr; - ClamBCAnalyzer *pAnalyzer = nullptr; + ClamBCAnalysis *pAnalyzer = nullptr; + ModuleAnalysisManager * pModuleAnalysisManager = nullptr; public: static char ID; explicit ClamBCWriter() - : ModulePass(ID), - TheModule(0), MapOut(0), Dumper(0) + : TheModule(0), MapOut(0), Dumper(0) { if (!MapFile.empty()) { std::error_code ec; @@ -735,17 +738,19 @@ class ClamBCWriter : public ModulePass, public InstVisitor void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - AU.addRequired(); AU.setPreservesAll(); } virtual bool doInitialization(Module &M); - bool runOnModule(Module &m) + PreservedAnalyses run(Module & m, ModuleAnalysisManager & mam) { - + doInitialization(m); pMod = &m; - pAnalyzer = &getAnalysis(); + pModuleAnalysisManager = &mam; + + ClamBCAnalysis & analysis = mam.getResult(m); + pAnalyzer = &analysis; pOutputWriter = ClamBCOutputWriter::createClamBCOutputWriter(outFile, pMod, pAnalyzer); for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -757,7 +762,8 @@ class ClamBCWriter : public ModulePass, public InstVisitor } } - return false; + doFinalization(m); + return PreservedAnalyses::all(); } void gatherGEPs(BasicBlock *pBB, std::vector &geps) @@ -784,6 +790,16 @@ class ClamBCWriter : public ModulePass, public InstVisitor } } + void updateAnalyzer(Value * pV){ + DEBUGERR << "HOPEFULLY WE CAN REMOVE THIS AND getHighestTID" << "\n"; + return; + + uint32_t tid = pAnalyzer->getHighestTID(); + DEBUG_NONPOINTER(tid); + + + } + void fixGEPs(Function *pFunc) { std::vector geps; @@ -799,6 +815,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor PointerType *pDestType = Type::getInt8PtrTy(pMod->getContext()); CastInst *ci = CastInst::CreatePointerCast(operand, pDestType, "ClamBCWriter_fixGEPs", pGep); + updateAnalyzer(ci); Value *index = pGep->getOperand(1); @@ -815,13 +832,23 @@ class ClamBCWriter : public ModulePass, public InstVisitor assert(multiplier && "HOW DID THIS END UP ZERO"); Constant *cMultiplier = ConstantInt::get(index->getType(), multiplier); + updateAnalyzer(cMultiplier); Value *newIndex = BinaryOperator::Create(Instruction::Mul, cMultiplier, index, "ClamBCWriter_fixGEPs", pGep); + updateAnalyzer(newIndex); GetElementPtrInst *pNew = nullptr; if (pGep->isInBounds()) { - pNew = GetElementPtrInst::Create(nullptr, ci, newIndex, "ClamBCWriter_fixGEPs", pGep); + DEBUG_VALUE(ci); + Type * pt = ci->getType(); + if (llvm::isa(pt)){ + pt = pt->getPointerElementType(); + } + DEBUG_VALUE(pt); + DEBUG_NONPOINTER(llvm::isa(pt)); + pNew = GetElementPtrInst::Create(pt, ci, newIndex, "ClamBCWriter_fixGEPs", pGep); + updateAnalyzer(pNew); } else { assert(0 && "DON'T THINK THIS CAN HAPPEN"); } @@ -829,6 +856,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor assert(pNew && "HOW DID HTIS HAPPEN"); ci = CastInst::CreatePointerCast(pNew, pGep->getType(), "ClamBCWriter_fixGEPs", pGep); + updateAnalyzer(ci); pGep->replaceAllUsesWith(ci); pGep->eraseFromParent(); @@ -837,12 +865,8 @@ class ClamBCWriter : public ModulePass, public InstVisitor bool runOnFunction(Function &F) { - //TODO: Move this to another pass once the Analyzer no longer - //makes changes to the code. fixGEPs(&F); - //Don't think I need this anymore. - //If anything, move it to a verifier. if ("" == F.getName()) { assert(0 && "Function created by ClamBCRebuild is not being deleted"); } @@ -857,10 +881,13 @@ class ClamBCWriter : public ModulePass, public InstVisitor return false; } fid++; + //Removed, see note about getFunctionID at the top of the file. - assert(pAnalyzer->getFunctionID(&F) == fid); + assert(pAnalyzer->getFunctionID(&F) == fid && "Function IDs don't match"); + + FunctionAnalysisManager &fam = pModuleAnalysisManager->getResult(*pMod).getManager(); - RA = &getAnalysis(F); + RA = &fam.getResult(F); printFunction(F); if (Dumper) { Dumper->runOnFunction(F); @@ -976,7 +1003,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor if (ConstantInt *CI = dyn_cast(GEP.getOperand(1))) { if (!CI->isZero()) { const PointerType *Ty = cast(GEP.getPointerOperand()->getType()); - const ArrayType *ATy = dyn_cast(Ty->getElementType()); + const ArrayType *ATy = dyn_cast(Ty->getPointerElementType()); if (ATy) { ClamBCStop("ATy", &GEP); } @@ -1387,10 +1414,12 @@ class ClamBCWriter : public ModulePass, public InstVisitor stop("ClamAV bytecode backend does not know about ", &I); } }; +#if 0 char ClamBCWriter::ID = 0; static RegisterPass X("clambc-writer", "ClamBCWriter Pass", false /* Only looks at CFG */, false /* Analysis Pass */); +#endif bool ClamBCWriter::doInitialization(Module &M) { @@ -1424,7 +1453,6 @@ bool ClamBCWriter::doInitialization(Module &M) //Dumper = createDbgInfoPrinterPass(); } fid = 0; - //OModule->writeGlobalMap(MapOut); MDDbgKind = M.getContext().getMDKindID("dbg"); return false; @@ -1608,7 +1636,26 @@ void ClamBCWriter::printBasicBlock(BasicBlock *BB) } } -llvm::ModulePass *createClamBCWriter() -{ - return new ClamBCWriter(); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ClamBCWriter", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "clambc-writer"){ + FPM.addPass(ClamBCWriter()); + return true; + } + return false; + } + ); + } + }; } + + + + diff --git a/libclambcc/Common/CMakeLists.txt b/libclambcc/Common/CMakeLists.txt new file mode 100644 index 0000000000..ffb95dbdec --- /dev/null +++ b/libclambcc/Common/CMakeLists.txt @@ -0,0 +1,74 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambccommon object library +# +add_library(clambccommon_obj OBJECT) +target_sources(clambccommon_obj + PRIVATE + ClamBCDiagnostics.cpp + ClamBCUtilities.cpp + version.c +) + +target_include_directories(clambccommon_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambccommon_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambccommon_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambccommon shared library. +# +add_library( clambccommon SHARED ) +target_link_libraries( clambccommon + PUBLIC + clambccommon_obj ) +set_target_properties( clambccommon PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambccommon PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambccommon PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS clambccommon DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS clambccommon DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/libclambcc/Common/ClamBCDiagnostics.cpp b/libclambcc/Common/ClamBCDiagnostics.cpp index 79c634b61f..5499578ec9 100644 --- a/libclambcc/Common/ClamBCDiagnostics.cpp +++ b/libclambcc/Common/ClamBCDiagnostics.cpp @@ -32,40 +32,14 @@ #include "clambc.h" using namespace llvm; -#if 0 -static inline void printSep(bool hasColors) -{ - if (hasColors) { - errs().resetColor(); - } - errs() << ":"; - if (hasColors) { - errs().changeColor(raw_ostream::SAVEDCOLOR, true); - } -} -#endif - // Print the main compile unit's source filename, // falls back to printing the module identifier. static void printLocation(const llvm::Module *M) { NamedMDNode *ND = M->getNamedMetadata("llvm.dbg.gv"); if (ND) { -#if 0 - unsigned N = ND->getNumOperands(); - // Try to find main compile unit - for (unsigned i = 0; i < N; i++) { - DIGlobalVariable G(ND->getOperand(i)); - DICompileUnit CU(G.getCompileUnit()); - if (!CU.isMain()) - continue; - errs() << /*CU.getDirectory() << "/" <<*/ CU.getFilename() << ": "; - return; - } -#else DEBUGERR << "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } errs() << M->getModuleIdentifier() << ": "; } @@ -81,27 +55,10 @@ static void printLocation(const llvm::Function *F) I != E; ++I) { if (const Instruction *T = I->getTerminator()) { if (MDNode *N = T->getMetadata(MDDebugKind)) { -#if 0 - DILocation Loc(N); - DIScope Scope = Loc.getScope(); - while (Scope.isLexicalBlock()) { - DILexicalBlock LB(Scope.getNode()); - Scope = LB.getContext(); - } - if (Scope.isSubprogram()) { - DISubprogram SP(Scope.getNode()); - errs() << /*Loc.getDirectory() << "/" << */ Loc.getFilename() - << ": in function '" - << SP.getDisplayName() - << "': "; - return; - } -#else DEBUGERR << N << "\n"; DEBUGERR << *N << "\n"; DEBUGERR << "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } } } @@ -126,31 +83,9 @@ void printLocation(const llvm::Instruction *I, bool fallback) BasicBlock::const_iterator ItB = BB->begin(); while (It != ItB) { if (MDNode *N = It->getMetadata("dbg")) { -#if 0 - DILocation Loc(N); - errs() << /*Loc.getDirectory() << "/" <<*/ Loc.getFilename() - << ":" << Loc.getLineNumber(); - if (unsigned Col = Loc.getColumnNumber()) { - errs() << ":" << Col; - } - if (approx) - errs() << "(?)"; - errs() << ": "; - DIScope Scope = Loc.getScope(); - while (Scope.isLexicalBlock()) { - DILexicalBlock LB(Scope.getNode()); - Scope = LB.getContext(); - } - if (Scope.isSubprogram()) { - DISubprogram SP(Scope.getNode()); - errs() << "in function '" << SP.getDisplayName() << "': "; - } - return; -#else DEBUGERR << *N << "\n"; DEBUGERR << approx << "\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } approx = true; --It; @@ -175,18 +110,8 @@ void printValue(const llvm::Value *V, bool printLocation, bool fallback) unsigned Line = 0; std::string File; std::string Dir; -#if 0 - if (!getLocationInfo(V, DisplayName, Type, Line, File, Dir)) { - if (fallback) - errs() << *V << "\n: "; - else - errs() << V->getName() << ": "; - return; - } -#else DEBUGERR << "FIXME: FIGURE OUT WHAT 'getLocationInfo' has been replaced with" << "\n"; -#endif errs() << "'" << DisplayName << "' "; if (printLocation) errs() << " (" << File << ":" << Line << ")"; @@ -201,15 +126,8 @@ void printLocation(const llvm::Module *M, const llvm::Value *V) unsigned Line = 0; std::string File; std::string Dir; -#if 0 - if (!getLocationInfo(V, DisplayName, Type, Line, File, Dir)) { - printLocation(M); - return; - } -#else DEBUGERR << "FIXME: FIGURE OUT WHAT 'getLocationInfo' has been replaced with" << "\n"; -#endif errs() << /*Dir << "/" <<*/ File << ":" << Line << ": "; } diff --git a/libclambcc/Common/ClamBCModule.h b/libclambcc/Common/ClamBCModule.h index a4a80c236d..1ed0675719 100644 --- a/libclambcc/Common/ClamBCModule.h +++ b/libclambcc/Common/ClamBCModule.h @@ -154,12 +154,6 @@ class ClamBCModule : public llvm::ModulePass virtual bool runOnModule(llvm::Module &M); virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const; -#if 0 - static void stop(const llvm::Twine &Msg, const llvm::Module *M); - static void stop(const llvm::Twine &Msg, const llvm::Function *F); - static void stop(const llvm::Twine &Msg, const llvm::Instruction *I); -#endif - void printNumber(uint64_t n, bool constant = false) { printNumber(Out, n, constant); diff --git a/libclambcc/Common/ClamBCUtilities.cpp b/libclambcc/Common/ClamBCUtilities.cpp index 04bfaadfcc..5ac4b0e9a7 100644 --- a/libclambcc/Common/ClamBCUtilities.cpp +++ b/libclambcc/Common/ClamBCUtilities.cpp @@ -40,7 +40,10 @@ bool functionRecurses(Function *pFunc, Function *orig, std::vector & for (auto blockIter = bb->begin(), blockEnd = bb->end(); blockIter != blockEnd; blockIter++) { Instruction *inst = llvm::cast(blockIter); if (CallInst *ci = llvm::dyn_cast(inst)) { - Value *calledValue = ci->getCalledValue(); + Value *calledValue = ci->getCalledFunction(); + if (nullptr == calledValue){ + ClamBCStop("Calls to function pointers not allowed", ci); + } if (calledValue == orig) { return true; } else if (Function *callee = dyn_cast(calledValue)) { @@ -61,8 +64,8 @@ bool functionRecurses(Function *pFunc) } void getDependentValues(llvm::Value *pv, std::set &insts, - std::set &globs, std::set &ces, - std::set &visited) + std::set &globs, std::set &ces, + std::set &visited) { if (visited.end() != std::find(visited.begin(), visited.end(), pv)) { return; @@ -110,7 +113,7 @@ void getDependentValues(llvm::Value *pv, std::set &insts, } void getDependentValues(llvm::Value *pv, std::set &insts, - std::set &globs) + std::set &globs) { std::set ces; std::set visited; @@ -118,7 +121,7 @@ void getDependentValues(llvm::Value *pv, std::set &insts, } void getDependentValues(llvm::Value *pv, std::set &insts, - std::set &globs, std::set &ces) + std::set &globs, std::set &ces) { std::set visited; getDependentValues(pv, insts, globs, ces, visited); @@ -139,3 +142,186 @@ llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock) { return llvm::cast(pBlock->getParent()->begin()); } + +int64_t getTypeSize(llvm::Module *pMod, llvm::Type *pt) +{ + + int64_t size = pt->getScalarSizeInBits(); + if (size) { + return size; + } + + ArrayType *pat = llvm::dyn_cast(pt); + if (nullptr != pat) { + size = pat->getNumElements() * (getTypeSize(pMod, pat->getElementType())); + if (size) { + return size; + } + } + + StructType *pst = llvm::dyn_cast(pt); + if (nullptr != pst) { + const StructLayout *psl = pMod->getDataLayout().getStructLayout(pst); + return psl->getSizeInBits(); + } + + assert(0 && "Size has not been computed"); + return -1; +} + +int64_t getTypeSizeInBytes(llvm::Module *pMod, Type *pt) +{ + return getTypeSize(pMod, pt) / 8; +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pt, uint64_t idx) +{ + + int64_t cnt = 0; + + assert((llvm::isa(pt) || llvm::isa(pt)) && "pt must be a complex type"); + + StructType *pst = llvm::dyn_cast(pt); + if (nullptr != pst) { + assert((idx <= pst->getNumElements()) && "Idx too high"); + + const StructLayout *psl = pMod->getDataLayout().getStructLayout(pst); + assert(psl && "Could not get layout"); + + cnt = psl->getElementOffsetInBits(idx) / 8; + + } else { + ArrayType *pat = llvm::dyn_cast(pt); + if (nullptr != pat) { + assert((idx <= pat->getNumElements()) && "Idx too high"); + cnt = idx * getTypeSizeInBytes(pMod, pat->getElementType()); + } + } + + return cnt; +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pst, ConstantInt *pIdx) +{ + int64_t idx = pIdx->getLimitedValue(); + return computeOffsetInBytes(pMod, pst, idx); +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pst) +{ + if (llvm::isa(pst)) { + return computeOffsetInBytes(pMod, pst, pst->getStructNumElements()); + } else if (llvm::isa(pst)) { + return computeOffsetInBytes(pMod, pst, pst->getArrayNumElements()); + } else { + assert(0 && "pt must be a complex type"); + } + + return 0; +} + +Type *findTypeAtIndex(Type *pst, ConstantInt *ciIdx) +{ + Type *ret = nullptr; + StructType *st = llvm::dyn_cast(pst); + if (nullptr != st) { + uint64_t idx = ciIdx->getLimitedValue(); + + assert(idx < st->getNumElements() && "Something went wrong"); + return st->getTypeAtIndex(idx); + } + + ArrayType *at = llvm::dyn_cast(pst); + if (nullptr != at) { + return at->getArrayElementType(); + } + return ret; +} + + +/*Only pass in either ConstantExpr or Instruction */ +Type * getResultType(Value * pVal){ + + Type *type = nullptr; + + if (llvm::isa(pVal)){ + ConstantExpr * pce = llvm::cast(pVal); + type = pce->getOperand(0)->getType(); + } else if ( llvm::isa(pVal)){ + Instruction * pInst = llvm::cast(pVal); + type = pInst->getOperand(0)->getType(); + } else { + assert (0 && "This function must be called with either Instruction or a ConstantExpr"); + return nullptr; + } + + if (llvm::isa(type)) { + if (llvm::isa(pVal)){ + GEPOperator * pgep = llvm::cast(pVal); + type = pgep->getSourceElementType(); + + } else if (llvm::isa(pVal)){ + GetElementPtrInst * pInst = llvm::cast(pVal); + type = pInst->getSourceElementType(); + } else if (llvm::isa(pVal)){ + BitCastOperator * pbco = llvm::cast(pVal); + type = pbco->getDestTy(); + } else if (llvm::isa(pVal)){ + BitCastInst * pInst = llvm::cast(pVal); + type = pInst->getDestTy(); + } else { + llvm::errs() << "<" << __LINE__ << ">" << "https://llvm.org/docs/OpaquePointers.html" << "\n"; + llvm::errs() << "<" << __LINE__ << ">" << *pVal << "\n"; + assert (0 && "FIGURE OUT WHAT TO DO HERE"); + } + } + + return type; +} + +void gatherCallsToIntrinsic(Function *pFunc, const char * const functionName , std::vector & calls) { + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++){ + BasicBlock * pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++){ + if (CallInst * pci = llvm::dyn_cast(bi)){ + Function * pCalled = pci->getCalledFunction(); + if (pCalled->isIntrinsic()){ + if (functionName == pCalled->getName()) { + calls.push_back(pci); + } + } + } + } + } +} + +void gatherCallsToIntrinsic(Module *pMod, const char * const functionName , std::vector & calls) { + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::cast(i); + if (pFunc->isDeclaration()) { + continue; + } + + gatherCallsToIntrinsic(pFunc, functionName, calls); + } +} + +void replaceAllCalls(FunctionType * pFuncType, Function * pFunc, + const std::vector & calls, const char * const namePrefix){ + + for (size_t i = 0; i < calls.size(); i++){ + CallInst * pci = calls[i]; + + std::vector args; + for (size_t i = 0; i < pci->arg_size(); i++){ + args.push_back(pci->getArgOperand(i)); + } + CallInst * pNew = CallInst::Create(pFuncType, pFunc, args, + namePrefix, pci); + pci->replaceAllUsesWith(pNew); + pci->eraseFromParent(); + + } +} + + diff --git a/libclambcc/Common/ClamBCUtilities.h b/libclambcc/Common/ClamBCUtilities.h index a010840aa0..8876cc5284 100644 --- a/libclambcc/Common/ClamBCUtilities.h +++ b/libclambcc/Common/ClamBCUtilities.h @@ -1,11 +1,18 @@ #ifndef CLAMBC_UTILITIES_H_ #define CLAMBC_UTILITIES_H_ + +#include +#include +#include + #include #include +#include #include #include +#include #include "ClamBCDiagnostics.h" @@ -26,4 +33,27 @@ bool functionHasLoop(llvm::Function *pFunc, llvm::LoopInfo &loopInfo); llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock); +int64_t getTypeSize(llvm::Module * pMod, llvm::Type *pt); + +int64_t getTypeSizeInBytes(llvm::Module * pMod, llvm::Type *pt); + +int64_t computeOffsetInBytes(llvm::Module * pMod, llvm::Type *pt, uint64_t idx); + +int64_t computeOffsetInBytes(llvm::Module * pMod, llvm::Type *pst, llvm::ConstantInt *pIdx); + +int64_t computeOffsetInBytes(llvm::Module * pMod, llvm::Type *pst); + +llvm::Type *findTypeAtIndex(llvm::Type *pst, llvm::ConstantInt *ciIdx); + +llvm::Type * getResultType(llvm::Value * pVal); + +void gatherCallsToIntrinsic(llvm::Function *pFunc, const char * const functionName , + std::vector & calls); + +void gatherCallsToIntrinsic(llvm::Module *pMod, const char * const functionName , + std::vector & calls) ; + +void replaceAllCalls(llvm::FunctionType * pFuncType, llvm::Function * pFunc, + const std::vector & calls, const char * const namePrefix); + #endif // CLAMBC_UTILITIES_H_ diff --git a/libclambcc/Common/clambc.h b/libclambcc/Common/clambc.h index 3d790a1f9e..3e828bd6f6 100644 --- a/libclambcc/Common/clambc.h +++ b/libclambcc/Common/clambc.h @@ -136,5 +136,17 @@ enum bc_global { #define DEBUGERR llvm::errs() << "<" << __FILE__ << "::" << __FUNCTION__ << "::" << __LINE__ << ">" #endif //DEBUGERR +#ifndef DEBUG_WHERE +#define DEBUG_WHERE llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">\n" +#endif + +#ifndef DEBUG_VALUE +#define DEBUG_VALUE(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << *__value__ << "\n"; +#endif + +#ifndef DEBUG_NONPOINTER +#define DEBUG_NONPOINTER(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << __value__ << "\n"; +#endif + #define BC_START_TID 69 #endif diff --git a/temp_delete_when_merge/build_all.py b/temp_delete_when_merge/build_all.py new file mode 100755 index 0000000000..c9027f031d --- /dev/null +++ b/temp_delete_when_merge/build_all.py @@ -0,0 +1,210 @@ +#!/usr/bin/python3 + +import os + +os.system("rm -f *.ll") + +SIG_DIR='sigs' + +COMPILE_CMD = """clang-16 \ + -S \ + -fno-discard-value-names \ + -fno-vectorize \ + --language=c \ + -emit-llvm \ + -Werror=unused-command-line-argument \ + -Xclang \ + -disable-O0-optnone \ + %s \ + -o \ + %s \ + -I \ + /home/aragusa/clamav-bytecode-compiler-aragusa/headers \ + -include \ + bytecode.h \ + -D__CLAMBC__""" + +OPTIONS_STR='--disable-loop-unrolling' +OPTIONS_STR+=" --disable-i2p-p2i-opt" +OPTIONS_STR+=" --disable-loop-unrolling" +OPTIONS_STR+=" --disable-promote-alloca-to-lds" +OPTIONS_STR+=" --disable-promote-alloca-to-vector" +OPTIONS_STR+=" --disable-simplify-libcalls" +OPTIONS_STR+=" --disable-tail-calls" +#OPTIONS_STR+=" --polly-vectorizer=none" +#OPTIONS_STR+=" --loop-vectorize" +OPTIONS_STR+=" --vectorize-slp=false" +OPTIONS_STR+=" --vectorize-loops=false" +#OPTIONS_STR+=" --disable-loop-vectorization" + + + +internalizeAPIList = "_Z10entrypointv,entrypoint,__clambc_kind,__clambc_virusname_prefix,__clambc_virusnames,__clambc_filesize,__clambc_match_counts,__clambc_match_offsets,__clambc_pedata,__Copyright" + +OPTIONS_STR+=f' -internalize-public-api-list="{internalizeAPIList}"' + +PASS_STR = "function(mem2reg)" +PASS_STR+=',' +PASS_STR+='clambc-remove-undefs' +PASS_STR+=',verify' +PASS_STR+=',clambc-preserve-abis' +PASS_STR+=',verify' +PASS_STR+=',default' +#PASS_STR+=',default' +PASS_STR+=',globalopt' +PASS_STR+=',clambc-preserve-abis' #remove fake function calls because O3 has already run +PASS_STR+=',verify' +#PASS_STR+=',clambc-remove-umin' +#PASS_STR+=',verify' +#PASS_STR+=',clambc-remove-umax' +#PASS_STR+=',verify' +#PASS_STR+=',clambc-remove-smax' +PASS_STR+=',clambc-remove-unsupported-icmp-intrinsics' +PASS_STR+=',verify' +PASS_STR+=',clambc-remove-usub' +PASS_STR+=',verify' +PASS_STR+=',clambc-remove-fshl' +PASS_STR+=',verify' +PASS_STR+=',clambc-remove-pointer-phis' +#PASS_STR+=',function(clambc-remove-pointer-phis)' +PASS_STR+=',verify' +PASS_STR+=',clambc-lowering-notfinal' # perform lowering pass +PASS_STR+=',verify' +PASS_STR+=',lowerswitch' +PASS_STR+=',verify' +PASS_STR+=',clambc-remove-icmp-sle' +PASS_STR+=',verify' +PASS_STR+=',function(clambc-verifier)' +PASS_STR+=',verify' +PASS_STR+=',clambc-remove-freeze-insts' +PASS_STR+=',verify' +PASS_STR+=',clambc-lowering-notfinal' # perform lowering pass +PASS_STR+=',verify' +PASS_STR+=',clambc-lcompiler-helper' #compile the logical_trigger function to a +PASS_STR+=',verify' +PASS_STR+=',clambc-lcompiler' #compile the logical_trigger function to a +PASS_STR+=',verify' +PASS_STR+=',internalize' +PASS_STR+=',verify' +PASS_STR+=',clambc-rebuild' +PASS_STR+=',verify' +PASS_STR+=',clambc-trace' +PASS_STR+=',verify' +PASS_STR+=',clambc-outline-endianness-calls' +PASS_STR+=',verify' +PASS_STR+=',clambc-change-malloc-arg-size' +PASS_STR+=',verify' +PASS_STR+=',clambc-extend-phis-to-64-bit' +PASS_STR+=',verify' +PASS_STR+=',clambc-convert-intrinsics' +PASS_STR+=',verify' +PASS_STR+=',globalopt' +PASS_STR+=',clambc-prepare-geps-for-writer' +PASS_STR+=',verify' +PASS_STR+=',clambc-writer' +PASS_STR+=',verify' + + + + +INSTALL_DIR=os.path.join(os.getcwd(), "..") +LOAD_STR = "" +LOAD_STR += "--load %s/install/lib/libclambccommon.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremoveundefs.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcpreserveabis.so " % INSTALL_DIR +#LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremoveumin.so " % INSTALL_DIR +#LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremoveumax.so " % INSTALL_DIR +#LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremovesmax.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremoveunsupportedicmpintrinsics.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremoveusub.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremovefshl.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremovepointerphis.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcloweringnf.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremoveicmpsle.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcverifier.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcremovefreezeinsts.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcloweringf.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambclogicalcompilerhelper.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambclogicalcompiler.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcrebuild.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambctrace.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcoutlineendiannesscalls.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcchangemallocargsize.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcextendphisto64bit.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcconvertintrinsics.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcpreparegepsforwriter.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcanalyzer.so " % INSTALL_DIR +#LOAD_STR += " --load-pass-plugin %s/install/lib/libclambctypeanalyzer.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcregalloc.so " % INSTALL_DIR +LOAD_STR += " --load-pass-plugin %s/install/lib/libclambcwriter.so " % INSTALL_DIR + + +#wd = os.getcwd() +#os.chdir(os.path.join(INSTALL_DIR, "install")) + +#os.system("tar xvf lib.tar") + + +#os.chdir(wd) + + + + + + + +#OPT_CMD = 'opt-16 -S %s --passes=\"-mem2reg\" --passes=\"%s\" %s ' % (LOAD_STR, PASS_STR, OPTIONS_STR) +OPT_CMD = 'opt-16 -S %s --passes=\"%s\" %s ' % (LOAD_STR, PASS_STR, OPTIONS_STR) + + +#print ("Re-evaluate here") +#print ("Disabling opaque pointers here") +OPT_CMD += " -opaque-pointers=0 " +COMPILE_CMD += " -Xclang -no-opaque-pointers " + +""" +#This is to find undefs. +print ("Take this part out, used to find undefs") +#PASS_STR = 'default' +OPTIONS_STR = '' +OPTIONS_STR+=" --vectorize-slp=false" +OPTIONS_STR+=" --vectorize-loops=false" +OPT_CMD = 'opt-16 -S %s --passes=\"%s\" %s ' % (LOAD_STR, PASS_STR, OPTIONS_STR) +""" + + + +OPT_CMD += "%s -o %s" + + + + + + +def run(cmd): + return os.system(cmd) + + +def compileFile(d, name): + llFile = name[:-1] + "ll" + + cmd = COMPILE_CMD % (os.path.join(d,name), llFile) + if (run(cmd)): + return + + cmd = OPT_CMD % (llFile, llFile + ".optimized.ll") + print (cmd) + + return run(cmd) + + +if '__main__' == __name__: + for s in os.listdir(SIG_DIR): + if (compileFile(SIG_DIR, s)): + print (f"Failed on {s}") + break +# os.system("rm -f *.ll") + + + + diff --git a/temp_delete_when_merge/run_opt.sh b/temp_delete_when_merge/run_opt.sh new file mode 100755 index 0000000000..f6262ecfc4 --- /dev/null +++ b/temp_delete_when_merge/run_opt.sh @@ -0,0 +1,153 @@ +#!/bin/bash + + +#might be useful +#https://stackoverflow.com/questions/67206238/how-to-define-and-read-cli-arguments-for-an-llvm-pass-with-the-new-pass-manager + +OPTIONS_STR='--disable-loop-unrolling' +OPTIONS_STR="$OPTIONS_STR --disable-i2p-p2i-opt" +OPTIONS_STR="$OPTIONS_STR --disable-loop-unrolling" +OPTIONS_STR="$OPTIONS_STR --disable-promote-alloca-to-lds" +OPTIONS_STR="$OPTIONS_STR --disable-promote-alloca-to-vector" +OPTIONS_STR="$OPTIONS_STR --disable-simplify-libcalls" +OPTIONS_STR="$OPTIONS_STR --disable-tail-calls" + + + + + + + + + +PASS_STR='clambc-remove-undefs' +PASS_STR=$PASS_STR,'clambc-preserve-abis' +PASS_STR=$PASS_STR,'default' +PASS_STR=$PASS_STR,'clambc-preserve-abis' #remove fake function calls because O3 has already run +PASS_STR=$PASS_STR,'function(clambc-remove-pointer-phis)' +#PASS_STR=$PASS_STR,'dce' +#PASS_STR=$PASS_STR,'globaldce' +#PASS_STR=$PASS_STR,'strip-dead-prototypes' +#PASS_STR=$PASS_STR,'constmerge' +#PASS_STR=$PASS_STR,'mem2reg' +#PASS_STR=$PASS_STR,'always-inline' +#PASS_STR=$PASS_STR,'globalopt' +#PASS_STR=$PASS_STR,'lowerswitch' +#PASS_STR=$PASS_STR,'lowerinvoke' +#PASS_STR=$PASS_STR,'globalopt' +#PASS_STR=$PASS_STR,'simplifycfg' +#PASS_STR=$PASS_STR,'indvars' +#PASS_STR=$PASS_STR,'constprop' #figure this out later +PASS_STR=$PASS_STR,'clambc-lowering-notfinal' # perform lowering pass +PASS_STR=$PASS_STR,'lowerswitch' +PASS_STR=$PASS_STR,'function(clambc-verifier)' +PASS_STR=$PASS_STR,'clambc-lowering-notfinal' # perform lowering pass +#PASS_STR=$PASS_STR,'dce' +#PASS_STR=$PASS_STR,'simplifycfg' +#PASS_STR=$PASS_STR,'mem2reg' +PASS_STR=$PASS_STR,'clambc-lcompiler' #compile the logical_trigger function to a +# #logical signature. +#PASS_STR=$PASS_STR,'internalize -internalize-public-api-list="{internalizeAPIList}"' +#PASS_STR=$PASS_STR,'globaldce' +#PASS_STR=$PASS_STR,'instcombine' +PASS_STR=$PASS_STR,'clambc-rebuild' +#PASS_STR=$PASS_STR,'verify' +#PASS_STR=$PASS_STR,'simplifycfg' +#PASS_STR=$PASS_STR,'dce' +#PASS_STR=$PASS_STR,'lowerswitch' +#PASS_STR=$PASS_STR,'clambc-verifier' +#PASS_STR=$PASS_STR,'verify' +#PASS_STR=$PASS_STR,'strip-debug-declare' +#PASS_STR=$PASS_STR,'clambc-lowering-final' +#PASS_STR=$PASS_STR,'clambc-trace' +#PASS_STR=$PASS_STR,'dce' +#PASS_STR=$PASS_STR,'clambc-module' +#PASS_STR=$PASS_STR,'verify' +#PASS_STR=$PASS_STR,'globalopt' +#PASS_STR=$PASS_STR,'remove-selects' +#PASS_STR=$PASS_STR,'clambc-outline-endianness-calls' #outline the endianness calls +# #because otherwise the call +# #is replaced with a constant +# #that is based on where the +# #signature was compiled, and +# #won't always be accurate. +#PASS_STR=$PASS_STR,'clambc-change-malloc-arg-size' #make sure we always use the +# #64-bit malloc. +#PASS_STR=$PASS_STR,'globalopt' +#PASS_STR=$PASS_STR,'clambc-extend-phis-to-64bit' #make all integer phi nodes 64-bit +# #because the llvm runtime inserts a +# #cast after phi nodes without +# #verifying that there is not +# #another phi node after it. +#PASS_STR=$PASS_STR,'clambc-prepare-geps-for-writer' #format gep indexes to not not +# #have more than 2, because +# #otherwise the writer gets +# #unhappy. +#PASS_STR=$PASS_STR,'globalopt' +#PASS_STR=$PASS_STR,'clambc-convert-intrinsics' #convert all memset intrinsics to +# #the 32-bit instead of the 64-bit +# #intrinsic +#PASS_STR=$PASS_STR,'clambc-writer' #write the bytecode +#PASS_STR=$PASS_STR,'clambc-writer-input-source={inputSourceFile}' +#PASS_STR=$PASS_STR,'clambc-sigfile={sigFile}' +# + + + + + + + +INSTALL_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + +clang-16 -S -fno-discard-value-names -emit-llvm -O0 -Xclang -disable-O0-optnone $INSTALL_DIR/testing/test.c + +#opt-16 -S \ +# --load $INSTALL_DIR/install/lib/libclambccommon.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcremoveundefs.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcpreserveabis.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcanalyzer.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcremovepointerphis.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcloweringf.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcloweringnf.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcverifier.so \ +# --passes="-mem2reg"\ +# --passes="clambc-remove-undefs,clambc-preserve-abis,default,clambc-preserve-abis,function(clambc-remove-pointer-phis),dce,clambc-lowering-notfinal,clambc-lowering-final,function(clambc-verifier)" \ +# test.ll -o test.t.ll + + + + +#opt-16 -S \ +# --load $INSTALL_DIR/install/lib/libclambccommon.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcremoveundefs.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcpreserveabis.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcanalyzer.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcremovepointerphis.so \ +# --load-pass-plugin $INSTALL_DIR/install/lib/libclambcverifier.so \ +# --passes="-mem2reg"\ +# --passes="clambc-remove-undefs,clambc-preserve-abis,default,clambc-preserve-abis,function(clambc-remove-pointer-phis),dce,function(clambc-verifier)" \ +# test.ll -o test.t.ll +# + + + +opt-16 -S \ + --load $INSTALL_DIR/install/lib/libclambccommon.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcremoveundefs.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcpreserveabis.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcanalyzer.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcremovepointerphis.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcloweringf.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcloweringnf.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcverifier.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambclogicalcompiler.so \ + --load-pass-plugin $INSTALL_DIR/install/lib/libclambcrebuild.so \ + --passes="$PASS_STR" \ + $OPTIONS_STR \ + test.ll -o test.t.ll + + + + + diff --git a/temp_delete_when_merge/testing/test.c b/temp_delete_when_merge/testing/test.c new file mode 100644 index 0000000000..4ea722edd8 --- /dev/null +++ b/temp_delete_when_merge/testing/test.c @@ -0,0 +1,196 @@ +#include +#include +#include +#include + + +int getBufferLen(const char * const buffer){ + int j; + for (j = 0; 0 != buffer[j]; j++) + ; + return j; +} + + +#if 0 +char* encodeUInt_getridofthis(int inval, char buffer[1024], int * size){ + int bufferSize = sizeof(buffer); + + int i; + int idx = bufferSize - 1; + *size = 0; + + for (i = 0; i < bufferSize; i++){ + buffer[i] = 0; + } + + while (inval){ + buffer[idx--] = '0' + (inval - (10 * (inval / 10))); + (*size)++; + + inval /= 10; + } + + idx++; + + return &(buffer[idx]); +} +#endif + + + + +char* encodeUInt(int inval, char * buffer, int bufferSize, int * size){ + + int i; + int idx = bufferSize - 1; + *size = 0; + + for (i = 0; i < bufferSize; i++){ + buffer[i] = 0; + } + + while (inval){ + buffer[idx--] = '0' + (inval - (10 * (inval / 10))); + (*size)++; + + inval /= 10; + } + + idx++; + + return &(buffer[idx]); +} + + +void andyprintf(const char * const formatStr, ...) __attribute__((always_inline)) { + int i; + char buffer[1024]; + + // Declaring pointer to the + // argument list + va_list ptr; + + // Initializing argument to the + // list pointer + va_start(ptr, formatStr); + + for (i = 0; 0 != formatStr[i]; ){ + if ('%' == formatStr[i]){ + i++; + if ('d' == formatStr[i]){ + int tmp = va_arg(ptr, int); + int len; + char * cp = encodeUInt(tmp, buffer, sizeof(buffer), &len); + write(1, cp, len); + } else if ('s' == formatStr[i]){ + const char * const tmp = va_arg(ptr, char*); + int len = getBufferLen(buffer); + write(1, buffer, len); + } + } else { + write(1, &(formatStr[i]), 1); + } + + + + + i++; + } + + // Ending argument list traversal + va_end(ptr); + + + +} + +#define PRINTSTR(__str__) { \ + int i = 0; \ + while (0 != __str__[i]) { \ + write(1, &(__str__[i]), 1) ; \ + } \ +} + +#define PRINTINT(__val__) { \ + int len; \ + char * cp = encodeUInt(__val__, buffer, sizeof(buffer), &len); \ + write(1, cp, len); \ +} + +void func(int val){ +#if 0 + andyprintf("%s::%d\n", __FUNCTION__, __LINE__); +#else + PRINTSTR(__FUNCTION__); + PRINTSTR("\n"); +#endif +} + + +typedef struct s *sp; + +static sp func2(){ + return NULL; +} + +const char * const CONST_CP = "hi there"; + +void func3(const char * const val) { + + if (CONST_CP == val){ + PRINTSTR("val = 'CP'\n"); + } else { + PRINTSTR("val NOT = CP\n"); + } + +} + + +#if 0 + +typedef void (*fc)(); + +void fptestfunc1(){ PRINTSTR(__FUNCTION__); PRINTSTR("\n");} +void fptestfunc2(){ PRINTSTR(__FUNCTION__); PRINTSTR("\n");} +int fptest(int argc, char ** argv){ + + fc func = fptestfunc1; + + if (argc > 1){ + func = fptestfunc2; + } + + func(); + + return 0; +} +#endif + + + + + + + + +int main(int argc, char ** argv){ + + const char * val = CONST_CP; + + if (argc > 2){ + func(1); + } else if (1 == argc){ + val = argv[0]; + } + + func3(val); + +#if 0 + fptest(argc, argv); +#endif + + return 0; + + +}