diff --git a/cpg-core/build.gradle.kts b/cpg-core/build.gradle.kts index f34baef41a..be5d9c6b7e 100644 --- a/cpg-core/build.gradle.kts +++ b/cpg-core/build.gradle.kts @@ -65,6 +65,8 @@ dependencies { implementation(libs.bundles.log4j) implementation(libs.kotlin.reflect) + implementation(libs.jacksonyml) + testImplementation(libs.junit.params) testFixturesApi(libs.kotlin.test.junit5) // somehow just using testFixturesApi(kotlin("test")) does not work for testFixtures diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt index 74bfbea49c..679628861e 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt @@ -34,6 +34,7 @@ import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.passes.* +import de.fraunhofer.aisec.cpg.passes.inference.DFGFunctionSummaries import de.fraunhofer.aisec.cpg.passes.order.* import java.io.File import java.nio.file.Path @@ -102,6 +103,8 @@ private constructor( */ val replacedPasses: Map>, KClass>>, KClass>>, + /** This list contains the files with function summaries which should be considered. */ + val functionSummaries: DFGFunctionSummaries, languages: List>, codeInNodes: Boolean, processAnnotations: Boolean, @@ -240,6 +243,7 @@ private constructor( private val passes = mutableListOf>>() private val replacedPasses = mutableMapOf>, KClass>>, KClass>>() + private val functionSummaries = mutableListOf() private var codeInNodes = true private var processAnnotations = false private var disableCleanup = false @@ -420,6 +424,11 @@ private constructor( return this } + fun registerFunctionSummaries(vararg functionSummary: File): Builder { + this.functionSummaries.addAll(functionSummary) + return this + } + /** Registers an additional [Language]. */ fun registerLanguage(language: Language<*>): Builder { languages.add(language) @@ -625,6 +634,7 @@ private constructor( includeBlocklist, orderPasses(), replacedPasses, + DFGFunctionSummaries.fromFiles(functionSummaries), languages, codeInNodes, processAnnotations, diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt index d4e8209bdf..6a72ab6484 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt @@ -30,6 +30,7 @@ import de.fraunhofer.aisec.cpg.frontends.HasShortCircuitOperators import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend import de.fraunhofer.aisec.cpg.graph.Node.Companion.EMPTY_NAME import de.fraunhofer.aisec.cpg.graph.NodeBuilder.log +import de.fraunhofer.aisec.cpg.graph.edge.ContextSensitiveDataflow import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.AssignExpression import de.fraunhofer.aisec.cpg.graph.types.ProblemType @@ -561,8 +562,21 @@ fun Literal.duplicate(implicit: Boolean): Literal { duplicate.comment = this.comment duplicate.file = this.file duplicate.name = this.name.clone() - duplicate.nextDFG = this.nextDFG - duplicate.prevDFG = this.prevDFG + for (next in this.nextDFGEdges) { + duplicate.addNextDFG( + next.end, + next.granularity, + (next as? ContextSensitiveDataflow)?.callingContext + ) + } + for (next in this.prevDFGEdges) { + duplicate.addPrevDFG( + next.start, + next.granularity, + (next as? ContextSensitiveDataflow)?.callingContext + ) + } + // TODO: This loses the properties of the edges. duplicate.nextEOG = this.nextEOG duplicate.prevEOG = this.prevEOG duplicate.isImplicit = implicit diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt index e1a1a857c0..c39731febf 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt @@ -251,8 +251,14 @@ open class Node : IVisitable, Persistable, LanguageProvider, ScopeProvider fun addNextDFG( next: Node, granularity: Granularity = default(), + callingContext: CallingContext? = null, ) { - val edge = Dataflow(this, next, granularity) + val edge = + if (callingContext != null) { + ContextSensitiveDataflow(this, next, callingContext, granularity) + } else { + Dataflow(this, next, granularity) + } nextDFGEdges.add(edge) next.prevDFGEdges.add(edge) } @@ -269,12 +275,21 @@ open class Node : IVisitable, Persistable, LanguageProvider, ScopeProvider } } - /** Adds a [Dataflow] edge from [prev] node to this node, with the given [Granularity]. */ + /** + * Adds a [Dataflow] edge from [prev] node to this node, with the given [Granularity] and + * [CallingContext]. + */ open fun addPrevDFG( prev: Node, granularity: Granularity = default(), + callingContext: CallingContext? = null, ) { - val edge = Dataflow(prev, this, granularity) + val edge = + if (callingContext != null) { + ContextSensitiveDataflow(prev, this, callingContext, granularity) + } else { + Dataflow(prev, this, granularity) + } prevDFGEdges.add(edge) prev.nextDFGEdges.add(edge) } @@ -288,12 +303,16 @@ open class Node : IVisitable, Persistable, LanguageProvider, ScopeProvider prev.nextCDGEdges.add(edge) } - /** Adds a [Dataflow] edge from all [prev] nodes to this node, with the given [Granularity]. */ + /** + * Adds a [Dataflow] edge from all [prev] nodes to this node, with the given [Granularity] and + * [CallingContext] if applicable. + */ fun addAllPrevDFG( prev: Collection, granularity: Granularity = full(), + callingContext: CallingContext? = null, ) { - prev.forEach { addPrevDFG(it, granularity) } + prev.forEach { addPrevDFG(it, granularity, callingContext) } } fun addAllPrevPDG(prev: Collection, dependenceType: DependenceType) { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/Dataflow.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/Dataflow.kt index fb3dbf8199..c8418b0db8 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/Dataflow.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/Dataflow.kt @@ -30,6 +30,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.TupleDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression import org.neo4j.ogm.annotation.RelationshipEntity @@ -80,11 +81,40 @@ fun partial(target: Declaration?): PartialDataflowGranularity { * [granularity]. */ @RelationshipEntity -class Dataflow( +open class Dataflow( start: Node, end: Node, /** The granularity of this dataflow. */ - val granularity: Granularity = default(), + val granularity: Granularity = default() ) : PropertyEdge(start, end) { override val label: String = "DFG" } + +sealed interface CallingContext + +class CallingContextIn( + /** The call expression that affects this dataflow edge. */ + val call: CallExpression +) : CallingContext + +class CallingContextOut( + /** The call expression that affects this dataflow edge. */ + val call: CallExpression +) : CallingContext + +/** + * This edge class defines a flow of data between [start] and [end]. The flow must have a + * [callingContext] which allows for a context-sensitive dataflow analysis. This edge can also have + * a certain [granularity]. + */ +@RelationshipEntity +class ContextSensitiveDataflow( + start: Node, + end: Node, + /** The calling context affecting this dataflow. */ + val callingContext: CallingContext, + /** The granularity of this dataflow. */ + granularity: Granularity, +) : Dataflow(start, end, granularity) { + override val label: String = "DFG" +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/PropertyEdgeConverterManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/PropertyEdgeConverterManager.kt index 6c3f88862d..6a044bc58a 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/PropertyEdgeConverterManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edge/PropertyEdgeConverterManager.kt @@ -26,6 +26,7 @@ package de.fraunhofer.aisec.cpg.graph.edge import de.fraunhofer.aisec.cpg.graph.declarations.TemplateDeclaration.TemplateInitialization +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression import java.util.function.Function /** @@ -49,6 +50,9 @@ class PropertyEdgeConverterManager private constructor() { addDeserializer("INSTANTIATION") { s: Any? -> if (s != null) TemplateInitialization.valueOf(s.toString()) else null } + addSerializer(CallExpression::class.java.name) { it.toString() } + addDeserializer("CALLING_CONTEXT_IN") { null } // TODO: Not supported yet + addDeserializer("CALLING_CONTEXT_OUT") { null } // TODO: Not supported yet } fun addSerializer(clazz: String, func: Function) { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt index 69b856f013..11e4f24478 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt @@ -32,6 +32,7 @@ import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.ValueDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.edge.CallingContext import de.fraunhofer.aisec.cpg.graph.edge.Granularity import de.fraunhofer.aisec.cpg.graph.scopes.Scope import de.fraunhofer.aisec.cpg.graph.types.HasType @@ -147,8 +148,8 @@ open class Reference : Expression(), HasType.TypeObserver, HasAliases { return super.hashCode() } - override fun addPrevDFG(prev: Node, granularity: Granularity) { - super.addPrevDFG(prev, granularity) + override fun addPrevDFG(prev: Node, granularity: Granularity, callingContext: CallingContext?) { + super.addPrevDFG(prev, granularity, callingContext) // We want to propagate assigned types all through the previous DFG nodes. Therefore, we // override the DFG adding function here and add a type observer to the previous node (if it diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/Util.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/Util.kt index e50cb7348b..44171e159c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/Util.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/Util.kt @@ -29,6 +29,7 @@ import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration +import de.fraunhofer.aisec.cpg.graph.edge.CallingContextIn import de.fraunhofer.aisec.cpg.graph.edge.Properties import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation @@ -355,7 +356,9 @@ object Util { fun attachCallParameters(target: FunctionDeclaration, call: CallExpression) { // Add an incoming DFG edge from a member call's base to the method's receiver if (target is MethodDeclaration && call is MemberCallExpression && !call.isStatic) { - target.receiver?.let { receiver -> call.base?.addNextDFG(receiver) } + target.receiver?.let { receiver -> + call.base?.addNextDFG(receiver, callingContext = CallingContextIn(call)) + } } // Connect the arguments to parameters @@ -370,12 +373,12 @@ object Util { if (param.isVariadic) { while (j < arguments.size) { // map all the following arguments to this variadic param - param.addPrevDFG(arguments[j]) + param.addPrevDFG(arguments[j], callingContext = CallingContextIn(call)) j++ } break } else { - param.addPrevDFG(arguments[j]) + param.addPrevDFG(arguments[j], callingContext = CallingContextIn(call)) } } j++ diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt index ad327dca37..c99894be79 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt @@ -28,9 +28,7 @@ package de.fraunhofer.aisec.cpg.passes import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* -import de.fraunhofer.aisec.cpg.graph.edge.Properties -import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge -import de.fraunhofer.aisec.cpg.graph.edge.partial +import de.fraunhofer.aisec.cpg.graph.edge.* import de.fraunhofer.aisec.cpg.graph.statements.DeclarationStatement import de.fraunhofer.aisec.cpg.graph.statements.ForEachStatement import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement @@ -122,15 +120,35 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass ) } } else { - key.addAllPrevDFG( - value.elements.filterNot { - (it is VariableDeclaration || it is ParameterDeclaration) && key == it + value.elements.forEach { + if ((it is VariableDeclaration || it is ParameterDeclaration) && key == it) { + // Nothing to do + } else if ( + Pair(it, key) in edgePropertiesMap && + edgePropertiesMap[Pair(it, key)] is CallingContext + ) { + key.addPrevDFG( + it, + callingContext = (edgePropertiesMap[Pair(it, key)] as? CallingContext) + ) + } else { + key.addPrevDFG(it) } - ) + } } } } + /** + * Checks if there's an entry in [edgePropertiesMap] with key `(x, null)` where `x` is in [from] + * and, if so, adds an entry with key `(x, to)` and the same value + */ + protected fun findAndSetProperties(from: Set, to: Node) { + edgePropertiesMap + .filter { it.key.first in from && it.key.second == null } + .forEach { edgePropertiesMap[Pair(it.key.first, to)] = it.value } + } + /** * Removes all the incoming and outgoing DFG edges for each variable declaration in the block of * code [node]. @@ -219,6 +237,10 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass // later for READ accesses. val declState = doubleState.declarationsState[currentNode.objectIdentifier()] if (declState != null) { + // We check if we have something relevant for this node (because there was an + // entry for the incoming edge) in the edgePropertiesMap and, if so, we generate + // a dedicated entry for the edge between declState and currentNode. + findAndSetProperties(declState.elements, currentNode) state.push(currentNode, declState) } else { // If we do not have a stored state of our object+field, we can use the field @@ -239,6 +261,10 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass // later for READ accesses. val declState = doubleState.declarationsState[currentNode.objectIdentifier()] if (declState != null) { + // We check if we have something relevant for this node (because there was an + // entry for the incoming edge) in the edgePropertiesMap and, if so, we generate + // a dedicated entry for the edge between declState and currentNode. + findAndSetProperties(declState.elements, currentNode) state.push(currentNode, declState) } else { // If we do not have a stored state of our object+field, we can use the field @@ -279,7 +305,12 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass writtenDeclaration = input.refersTo if (writtenDeclaration != null) { - state.push(input, doubleState.declarationsState[writtenDeclaration]) + val prev = doubleState.declarationsState[writtenDeclaration] + // We check if we have something relevant for this node (because there was an entry + // for the incoming edge) in the edgePropertiesMap and, if so, we generate a + // dedicated entry for the edge between declState and currentNode. + findAndSetProperties(prev?.elements ?: setOf(), currentNode) + state.push(input, prev) doubleState.declarationsState[writtenDeclaration] = PowersetLattice(identitySetOf(input)) } @@ -292,8 +323,10 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass writtenDeclaration = (lhs as? Reference)?.refersTo if (writtenDeclaration != null && lhs != null) { + val prev = doubleState.declarationsState[writtenDeclaration] + findAndSetProperties(prev?.elements ?: setOf(), currentNode) // Data flows from the last writes to the lhs variable to this node - state.push(lhs, doubleState.declarationsState[writtenDeclaration]) + state.push(lhs, prev) // The whole current node is the place of the last update, not (only) the lhs! doubleState.declarationsState[writtenDeclaration] = @@ -309,6 +342,10 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass doubleState.declarationsState[currentNode.refersTo]?.let { // We only read the variable => Get previous write which have been collected in // the other steps + // We check if we have something relevant for this node (because there was an entry + // for the incoming edge) in the edgePropertiesMap and, if so, we generate a + // dedicated entry for the edge between declState and currentNode. + findAndSetProperties(it.elements, currentNode) state.push(currentNode, it) } } else if ( @@ -388,6 +425,42 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass currentNode, PowersetLattice(identitySetOf(currentNode)) ) + } else if (currentNode is CallExpression) { + // If the CallExpression invokes a function for which we have a function summary, we use + // the summary to identify the last write to a parameter (or receiver) and match it to + // the respective argument or the base. + // Since this Reference r is manipulated inside the invoked function, the next + // read-access of a Reference r' with r'.refersTo == r.refersTo will be affected by the + // node that has been stored inside the function summary for this particular + // parameter/receiver, and we store this last write-access in the state. + // As the node is in another function, we also store the CallingContext of the call + // expression in the edgePropertiesMap. + val functionsWithSummaries = + currentNode.invokes.filter { ctx.config.functionSummaries.hasSummary(it) } + if (functionsWithSummaries.isNotEmpty()) { + for (invoked in functionsWithSummaries) { + val changedParams = ctx.config.functionSummaries.getLastWrites(invoked) + for ((param, _) in changedParams) { + val arg = + when (param) { + (invoked as? MethodDeclaration)?.receiver -> + (currentNode as? MemberCallExpression)?.base as? Reference + is ParameterDeclaration -> + currentNode.arguments[param.argumentIndex] as? Reference + else -> null + } + doubleState.declarationsState[arg?.refersTo] = + PowersetLattice(identitySetOf(param)) + edgePropertiesMap[Pair(param, null)] = CallingContextOut(currentNode) + } + } + } else { + // The default behavior so we continue with the next EOG thing. + doubleState.declarationsState.push( + currentNode, + doubleState.declarationsState[currentEdge.start] + ) + } } else { doubleState.declarationsState.push( currentNode, @@ -397,6 +470,16 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass return state } + /** + * We use this map to store additional information on the DFG edges which we cannot keep in the + * state. This is for example the case to identify if the resulting edge will receive a + * context-sensitivity label (i.e., if the node used as key is somehow inside the called + * function and the next usage happens inside the function under analysis right now). The key of + * an entry works as follows: The 1st item in the pair is the prevDFG of the 2nd item. If the + * 2nd item is null, it's obviously not relevant. Ultimately, it will be 2nd -prevDFG-> 1st. + */ + val edgePropertiesMap = mutableMapOf, Any>() + /** * Checks if the node performs an operation and an assignment at the same time e.g. with the * operators +=, -=, *=, ... diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt index c175e00756..e1680daa4d 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt @@ -28,32 +28,72 @@ package de.fraunhofer.aisec.cpg.passes import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.edge.CallingContextOut import de.fraunhofer.aisec.cpg.graph.edge.partial import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.IterativeGraphWalker import de.fraunhofer.aisec.cpg.helpers.Util +import de.fraunhofer.aisec.cpg.passes.inference.DFGFunctionSummaries import de.fraunhofer.aisec.cpg.passes.order.DependsOn /** Adds the DFG edges for various types of nodes. */ @DependsOn(SymbolResolver::class) class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { + private val callsInferredFunctions = mutableListOf() + override fun accept(component: Component) { val inferDfgForUnresolvedCalls = config.inferenceConfiguration.inferDfgForUnresolvedSymbols val walker = IterativeGraphWalker() walker.registerOnNodeVisit2 { node, parent -> - handle(node, parent, inferDfgForUnresolvedCalls) + handle(node, parent, inferDfgForUnresolvedCalls, config.functionSummaries) } for (tu in component.translationUnits) { walker.iterate(tu) } + if (config.registeredPasses.all { ControlFlowSensitiveDFGPass::class !in it }) { + connectInferredCallArguments(config.functionSummaries) + } + } + + /** + * For inferred functions which have function summaries encoded, we connect the arguments to + * modified parameter to propagate the changes to the arguments out of the [FunctionDeclaration] + * again. + */ + private fun connectInferredCallArguments(functionSummaries: DFGFunctionSummaries) { + for (call in callsInferredFunctions) { + for (invoked in call.invokes.filter { it.isInferred }) { + val changedParams = + functionSummaries.functionToChangedParameters[invoked] ?: mapOf() + for ((param, _) in changedParams) { + if (param == (invoked as? MethodDeclaration)?.receiver) { + (call as? MemberCallExpression) + ?.base + ?.addPrevDFG(param, callingContext = CallingContextOut(call)) + } else if (param is ParameterDeclaration) { + val arg = call.arguments[param.argumentIndex] + arg.addPrevDFG(param, callingContext = CallingContextOut(call)) + (arg as? Reference)?.let { + it.access = AccessValues.READWRITE + it.refersTo?.let { it1 -> it.addNextDFG(it1) } + } + } + } + } + } } override fun cleanup() { // Nothing to do } - protected fun handle(node: Node?, parent: Node?, inferDfgForUnresolvedSymbols: Boolean) { + protected fun handle( + node: Node?, + parent: Node?, + inferDfgForUnresolvedSymbols: Boolean, + functionSummaries: DFGFunctionSummaries + ) { when (node) { // Expressions is CallExpression -> handleCallExpression(node, inferDfgForUnresolvedSymbols) @@ -83,7 +123,7 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { is IfStatement -> handleIfStatement(node) // Declarations is FieldDeclaration -> handleFieldDeclaration(node) - is FunctionDeclaration -> handleFunctionDeclaration(node) + is FunctionDeclaration -> handleFunctionDeclaration(node, functionSummaries) is TupleDeclaration -> handleTupleDeclaration(node) is VariableDeclaration -> handleVariableDeclaration(node) } @@ -159,15 +199,23 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { * Adds the DFG edge for a [FunctionDeclaration]. The data flows from the return statement(s) to * the function. */ - protected fun handleFunctionDeclaration(node: FunctionDeclaration) { + protected fun handleFunctionDeclaration( + node: FunctionDeclaration, + functionSummaries: DFGFunctionSummaries + ) { if (node.isInferred) { - // If the function is inferred, we connect all parameters to the function declaration. - // The condition should make sure that we don't add edges multiple times, i.e., we - // only handle the declaration exactly once. - node.addAllPrevDFG(node.parameters) - // If it's a method with a receiver, we connect that one too. - if (node is MethodDeclaration) { - node.receiver?.let { node.addPrevDFG(it) } + val summaryExists = functionSummaries.addFlowsToFunctionDeclaration(node) + + if (!summaryExists) { + // If the function is inferred, we connect all parameters to the function + // declaration. + // The condition should make sure that we don't add edges multiple times, i.e., we + // only handle the declaration exactly once. + node.addAllPrevDFG(node.parameters) + // If it's a method with a receiver, we connect that one too. + if (node is MethodDeclaration) { + node.receiver?.let { node.addPrevDFG(it) } + } } } else { node.allChildren().forEach { node.addPrevDFG(it) } @@ -415,7 +463,10 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { } else if (call.invokes.isNotEmpty()) { call.invokes.forEach { Util.attachCallParameters(it, call) - call.addPrevDFG(it) + call.addPrevDFG(it, callingContext = CallingContextOut(call)) + if (it.isInferred) { + callsInferredFunctions.add(call) + } } } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/DFGFunctionSummaries.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/DFGFunctionSummaries.kt new file mode 100644 index 0000000000..b6b52b82bc --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/DFGFunctionSummaries.kt @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.passes.inference + +import com.fasterxml.jackson.core.JsonFactory +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory +import com.fasterxml.jackson.module.kotlin.readValue +import com.fasterxml.jackson.module.kotlin.registerKotlinModule +import de.fraunhofer.aisec.cpg.TranslationConfiguration.Builder +import de.fraunhofer.aisec.cpg.ancestors +import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.objectType +import de.fraunhofer.aisec.cpg.graph.parseName +import de.fraunhofer.aisec.cpg.graph.types.Type +import de.fraunhofer.aisec.cpg.isDerivedFrom +import java.io.File + +/** + * If the user of the library registers one or multiple DFG-function summary files (via + * [Builder.registerFunctionSummaries]), this class is responsible for parsing the files, caching + * the result and adding the respective DFG summaries to the [FunctionDeclaration]. + */ +class DFGFunctionSummaries { + private constructor() + + /** Caches a mapping of the [FunctionDeclarationEntry] to a list of its [DFGEntry]. */ + val functionToDFGEntryMap = mutableMapOf>() + + /** + * Saves the information on which parameter(s) of a function are modified by the function. This + * is interesting since we need to add DFG edges between the modified parameter and the + * respective argument(s). For each [ParameterDeclaration] as well as the + * [MethodDeclaration.receiver] that has some incoming DFG-edge within this + * [FunctionDeclaration], we store all previous DFG nodes. + */ + val functionToChangedParameters = + mutableMapOf>>() + + fun hasSummary(functionDeclaration: FunctionDeclaration) = + functionDeclaration in functionToChangedParameters + + fun getLastWrites(functionDeclaration: FunctionDeclaration): Map> = + functionToChangedParameters[functionDeclaration] ?: mapOf() + + /** This function returns a list of [DataflowEntry] from the specified file. */ + private fun addEntriesFromFile(file: File): Map> { + val mapper = + if (file.extension.lowercase() in listOf("yaml", "yml")) { + ObjectMapper(YAMLFactory()) + } else { + ObjectMapper(JsonFactory()) + } + .registerKotlinModule() + val entries = mapper.readValue>(file) + for (entry in entries) { + functionToDFGEntryMap[entry.functionDeclaration] = entry.dataFlows + } + return functionToDFGEntryMap + } + + /** + * Adds the DFG edges to the [functionDeclaration] depending on the function summaries which are + * kept in this object. If no suitable entry was found, this method returns `false`. + */ + fun addFlowsToFunctionDeclaration(functionDeclaration: FunctionDeclaration): Boolean { + val dfgEntries = findFunctionDeclarationEntry(functionDeclaration) ?: return false + applyDfgEntryToFunctionDeclaration(functionDeclaration, dfgEntries) + return true + } + + /** + * It identifies the "best match" of all [FunctionDeclarationEntry]s stored in the + * [functionToDFGEntryMap] for the given [functionDecl]. It therefore checks that + * 1) The languages match + * 2) The method/function names match + * 3) If there are multiple entries with different signatures, the signature has to match. If + * none of the entries with a signature matches, we take the "default" entry without a + * signature. + * 4) If it's a method (i.e., invoked on an object), we also consider which type of the + * receiver/base is the most precise one + * + * This method returns the list of [DFGEntry] for the "best match" or `null` if no entry + * matches. + */ + private fun findFunctionDeclarationEntry(functionDecl: FunctionDeclaration): List? { + if (functionToDFGEntryMap.isEmpty()) return null + + val language = functionDecl.language + val languageName = language?.javaClass?.name + val methodName = functionDecl.name + // The language and the method name have to match. If a signature is specified, it also has + // to match to the one of the FunctionDeclaration, null indicates that we accept everything. + val matchingEntries = + functionToDFGEntryMap.keys.filter { + // The language has to match otherwise the remaining comparison is useless + if (it.language == languageName) { + // Split the name if we have a FQN + val entryMethodName = language.parseName(it.methodName) + val entryRecord = + entryMethodName.parent?.let { + functionDecl.objectType(entryMethodName.parent) + } + methodName.lastPartsMatch( + entryMethodName.localName + ) && // The local name has to match + // If it's a method, the record declaration has to be compatible with the + // type of the entry's record declaration. We take the type of the method + // name's parent and generate a type from it. We then check if this type is + // a supertype + (entryRecord == null || + (functionDecl as? MethodDeclaration) + ?.recordDeclaration + ?.toType() + ?.isDerivedFrom(entryRecord) == true) && + // The parameter types have to match + (it.signature == null || + functionDecl.hasSignature( + it.signature.map { signatureType -> + functionDecl.objectType(signatureType) + } + )) + } else { + false + } + } + return if (matchingEntries.size == 1) { + // Only one entry => We take this one. + functionToDFGEntryMap[matchingEntries.single()] + } else if (matchingEntries.filter { it.signature != null }.size == 1) { + // Only one entry with a matching signature => We take this one. + functionToDFGEntryMap[matchingEntries.single { it.signature != null }] + } else if (matchingEntries.isNotEmpty()) { + /* There are multiple matching entries. We use the following routine: + * First, we filter for existing signatures. + * Second, we filter for the most precise class. + * If there are still multiple options, we take the longest signature. + * If this also didn't help to get a precise result, we iterate through the parameters and take the most precise one. We start with index 0 and count upwards, so if param0 leads to a single result, we're done and other entries won't be considered even if all the remaining parameters are more precise or whatever. + * If nothing helped to get a unique entry, we pick the first remaining entry and hope it's the most precise one. + */ + val typeEntryList = + matchingEntries + .filter { it.signature != null } + .map { + Pair( + language.parseName(it.methodName).parent?.let { it1 -> + functionDecl.objectType(it1) + }, + it + ) + } + var mostPreciseClassEntries = mutableListOf() + var mostPreciseType = typeEntryList.first().first + var superTypes = mostPreciseType?.ancestors?.map { it.type } ?: setOf() + for (typeEntry in typeEntryList) { + if (typeEntry.first == mostPreciseType) { + mostPreciseClassEntries.add(typeEntry.second) + } else if (typeEntry.first in superTypes) { + mostPreciseClassEntries.clear() + mostPreciseClassEntries.add(typeEntry.second) + mostPreciseType = typeEntry.first + superTypes = mostPreciseType?.ancestors?.map { it.type } ?: setOf() + } + } + val maxSignature = mostPreciseClassEntries.mapNotNull { it.signature?.size }.max() + if (mostPreciseClassEntries.size > 1) { + mostPreciseClassEntries = + mostPreciseClassEntries + .filter { it.signature?.size == maxSignature } + .toMutableList() + } + // Filter parameter types. We start with parameter 0 and continue. Let's hope we remove + // some entries here. + var argIndex = 0 + while (mostPreciseClassEntries.size > 1 && argIndex < maxSignature) { + mostPreciseType = + mostPreciseClassEntries.first().signature?.get(argIndex)?.let { + functionDecl.objectType(it) + } + superTypes = mostPreciseType?.ancestors?.map { it.type } ?: setOf() + val newMostPrecise = mutableListOf() + for (entry in mostPreciseClassEntries) { + val currentType = + entry.signature?.get(argIndex)?.let { functionDecl.objectType(it) } + if (currentType == mostPreciseType) { + newMostPrecise.add(entry) + } else if (currentType in superTypes) { + newMostPrecise.clear() + newMostPrecise.add(entry) + mostPreciseType = currentType + superTypes = mostPreciseType?.ancestors?.map { it.type } ?: setOf() + } + } + argIndex++ + mostPreciseClassEntries = newMostPrecise + } + functionToDFGEntryMap[mostPreciseClassEntries.first()] + } else { + null + } + } + + /** + * This method parses the [DFGEntry] entries in [dfgEntries] and adds the respective DFG edges + * between the parameters, receiver and potentially the [functionDeclaration] itself. + */ + private fun applyDfgEntryToFunctionDeclaration( + functionDeclaration: FunctionDeclaration, + dfgEntries: List + ) { + for (entry in dfgEntries) { + val from = + if (entry.from.startsWith("param")) { + try { + val paramIndex = entry.from.removePrefix("param").toInt() + functionDeclaration.parameters[paramIndex] + } catch (e: NumberFormatException) { + null + } + } else if (entry.from == "base") { + (functionDeclaration as? MethodDeclaration)?.receiver + } else { + null + } + val to = + if (entry.to.startsWith("param")) { + try { + val paramIndex = entry.to.removePrefix("param").toInt() + val paramTo = functionDeclaration.parameters[paramIndex] + if (from != null) { + functionToChangedParameters + .computeIfAbsent(functionDeclaration) { mutableMapOf() } + .computeIfAbsent(paramTo) { mutableSetOf() } + .add(from) + } + paramTo + } catch (e: NumberFormatException) { + null + } + } else if (entry.to == "base") { + val receiver = (functionDeclaration as? MethodDeclaration)?.receiver + if (from != null) { + if (receiver != null) { + functionToChangedParameters + .computeIfAbsent(functionDeclaration) { mutableMapOf() } + .computeIfAbsent(receiver, ::mutableSetOf) + .add(from) + } + } + receiver + } else if (entry.to == "return") { + functionDeclaration + } else if (entry.to.startsWith("return")) { + val returnIndex = entry.to.removePrefix("param").toInt() + // TODO: It would be nice if we could model the index. Not sure how this is done + functionDeclaration + } else { + null + } + // TODO: It would make sense to model properties here. Could be the index of a return + // value, full vs. partial flow or whatever comes to our minds in the future + to?.let { from?.addNextDFG(it) } + } + } + + /** + * This class summarizes a data flow entry. Consists of the [functionDeclaration] for which it + * is relevant and a list [dataFlows] of data flow summaries. + */ + private data class DataflowEntry( + val functionDeclaration: FunctionDeclarationEntry, + val dataFlows: List + ) + + /** + * This class is used to identify the [FunctionDeclaration] of interest for the specified flows. + */ + data class FunctionDeclarationEntry( + /** The FQN of the [Language] for which this flow is relevant. */ + val language: String, + /** The FQN of the [FunctionDeclaration] or [MethodDeclaration]. */ + val methodName: String, + /** + * The signature of the [FunctionDeclaration]. We use a list of the FQN of the [Type]s of + * parameter. This is optional and if not specified, we perform the matching only based on + * the [methodName]. + */ + val signature: List? = null + ) + + /** Represents a data flow entry. */ + data class DFGEntry( + /** + * The start of the DFG edge. Can be a parameter (`paramX`, where X is a number), or `base`. + */ + val from: String, + /** + * The end of the DFG edge. Can be a parameter (`paramX`, where X is a number), `base`, or + * the return value (`returnX`, where X is optional and a number indicating an index). + */ + val to: String, + /** + * A property which can give us more information. Currently, it's ignored, but it would make + * sense to add e.g. partial flows based on PR 1421. + */ + val dfgType: String + ) + + companion object { + /** Generates a [DFGFunctionSummaries] object from the given [files]. */ + fun fromFiles(files: List): DFGFunctionSummaries { + val dfgFunctionSummaries = DFGFunctionSummaries() + files.forEach { dfgFunctionSummaries.addEntriesFromFile(it) } + return dfgFunctionSummaries + } + } +} diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/DFGFunctionSummariesTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/DFGFunctionSummariesTest.kt new file mode 100644 index 0000000000..108a1387e8 --- /dev/null +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/DFGFunctionSummariesTest.kt @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.enhancements + +import de.fraunhofer.aisec.cpg.GraphExamples +import de.fraunhofer.aisec.cpg.InferenceConfiguration +import de.fraunhofer.aisec.cpg.TranslationConfiguration +import de.fraunhofer.aisec.cpg.TranslationResult +import de.fraunhofer.aisec.cpg.frontends.TestLanguage +import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.builder.* +import de.fraunhofer.aisec.cpg.graph.edge.CallingContextIn +import de.fraunhofer.aisec.cpg.graph.edge.CallingContextOut +import de.fraunhofer.aisec.cpg.graph.edge.ContextSensitiveDataflow +import de.fraunhofer.aisec.cpg.graph.functions +import de.fraunhofer.aisec.cpg.graph.pointer +import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration +import de.fraunhofer.aisec.cpg.passes.* +import de.fraunhofer.aisec.cpg.passes.inference.DFGFunctionSummaries +import de.fraunhofer.aisec.cpg.passes.inference.startInference +import java.io.File +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +class DFGFunctionSummariesTest { + + @Test + fun testParsingFile() { + val jsonSummaries = + DFGFunctionSummaries.fromFiles(listOf(File("src/test/resources/function-dfg.json"))) + + assertTrue(jsonSummaries.functionToDFGEntryMap.isNotEmpty()) + val yamlSummaries = + DFGFunctionSummaries.fromFiles(listOf(File("src/test/resources/function-dfg.yml"))) + + assertTrue(yamlSummaries.functionToDFGEntryMap.isNotEmpty()) + + assertEquals(jsonSummaries.functionToDFGEntryMap, yamlSummaries.functionToDFGEntryMap) + } + + @Test + fun testMatching() { + val code = + GraphExamples.testFrontend( + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(TestLanguage(".")) + .registerFunctionSummaries(File("src/test/resources/function-dfg2.yml")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferDfgForUnresolvedCalls(true) + .inferFunctions(true) + .build() + ) + .build() + ) + .build { + translationResult { + translationUnit("DfgInferredCall.c") { + function("main", t("int")) { + body { + // We need three types with a type hierarchy. + val objectType = t("test.Object") + val listType = t("test.List") + ctx?.let { + val recordDecl = + listType + .startInference(it) + ?.inferRecordDeclaration( + listType, + this@translationUnit + ) + listType.recordDeclaration = recordDecl + recordDecl?.addSuperClass(objectType) + listType.superTypes.add(objectType) + } + + val specialListType = t("test.SpecialList") + ctx?.let { + val recordDecl = + specialListType + .startInference(it) + ?.inferRecordDeclaration( + specialListType, + this@translationUnit + ) + specialListType.recordDeclaration = recordDecl + recordDecl?.addSuperClass(listType) + specialListType.superTypes.add(listType) + } + + val verySpecialListType = t("test.VerySpecialList") + ctx?.let { + val recordDecl = + specialListType + .startInference(it) + ?.inferRecordDeclaration( + specialListType, + this@translationUnit + ) + specialListType.recordDeclaration = recordDecl + recordDecl?.addSuperClass(listType) + specialListType.superTypes.add(listType) + } + + memberCall("addAll", construct("test.VerySpecialList")) { + literal(1, t("int")) + construct("test.Object") + } + + memberCall("addAll", construct("test.SpecialList")) { + literal(1, t("int")) + construct("test.List") + } + + memberCall("addAll", construct("test.SpecialList")) { + literal(1, t("int")) + construct("test.Object") + } + + declare { + variable("a", t("test.List")) { construct("test.List") } + } + + memberCall("addAll", ref("a", t("test.List"))) { + literal(1, t("int")) + construct("test.Object") + } + call("print") { ref("a", t("test.List")) } + + memberCall("addAll", construct("random.Type")) { + literal(1, t("int")) + construct("test.Object") + } + + returnStmt { literal(0, t("int")) } + } + } + } + } + } + + // Explicitly specified. Easiest case. Base class, directly specified. Overloaded things + // don't match. Child entries don't match. + val listAddAllTwoArgs = code.methods["test.List.addAll"] + assertNotNull(listAddAllTwoArgs) + assertEquals(2, listAddAllTwoArgs.parameters.size) + assertEquals( + setOf(listAddAllTwoArgs.receiver!!), + listAddAllTwoArgs.parameters[1].nextDFG + ) + // No flow from param0 or receiver specified => Should be empty and differ from default + // behavior + assertEquals(setOf(), listAddAllTwoArgs.parameters[0].nextDFG) + assertEquals(setOf(), listAddAllTwoArgs.prevDFG) + + // Specified by parent class' method List.addAll(int, Object). Test that parent of base is + // also taken into account. + val specialListAddAllTwoArgs = + code.methods("test.SpecialList.addAll").first { + it.parameters[1].type.name.lastPartsMatch("test.Object") + } + assertNotNull(specialListAddAllTwoArgs) + assertEquals(2, specialListAddAllTwoArgs.parameters.size) + assertEquals( + setOf(specialListAddAllTwoArgs.receiver!!), + specialListAddAllTwoArgs.parameters[1].nextDFG + ) + // No flow from param0 or receiver specified => Should be empty and differ from default + // behavior + assertEquals(setOf(), specialListAddAllTwoArgs.parameters[0].nextDFG) + assertEquals(setOf(), specialListAddAllTwoArgs.prevDFG) + + // Specified by parent class' method List.addAll(int, List). Tests the most precise + // signature matching in case of function overloading. + val specialListAddAllSpecializedArgs = + code.methods("test.SpecialList.addAll").first { + it.parameters[1].type.name.lastPartsMatch("test.List") + } + assertNotNull(specialListAddAllSpecializedArgs) + assertEquals(2, specialListAddAllSpecializedArgs.parameters.size) + // Very weird data flow specified: receiver to param0 and param1 to return. + assertEquals( + setOf(specialListAddAllSpecializedArgs.parameters[0]), + specialListAddAllSpecializedArgs.receiver?.nextDFG ?: setOf() + ) + assertEquals( + setOf(specialListAddAllSpecializedArgs), + specialListAddAllSpecializedArgs.parameters[1].nextDFG + ) + + // Specified by VerySpecialList.addAll(int, Object), overrides List.addAll(int, Object). + // Tests that we take the most precise base class. The entry of List.addAll(int, Object) is + // also applicable but isn't the most precise one (due to the base) + val verySpecialListAddAllSpecializedArgs = code.methods["test.VerySpecialList.addAll"] + assertNotNull(verySpecialListAddAllSpecializedArgs) + assertEquals(2, verySpecialListAddAllSpecializedArgs.parameters.size) + // Very weird data flow specified: receiver to param0 and param1 to return. + assertEquals( + setOf(verySpecialListAddAllSpecializedArgs.parameters[0]), + verySpecialListAddAllSpecializedArgs.receiver?.nextDFG ?: setOf() + ) + assertEquals( + setOf(verySpecialListAddAllSpecializedArgs), + verySpecialListAddAllSpecializedArgs.parameters[1].nextDFG + ) + + // Not specified => Default behavior (param0 and param1 and receiver to method declaration). + val randomTypeAddAllTwoArgs = code.methods["random.Type.addAll"] + assertNotNull(randomTypeAddAllTwoArgs) + assertEquals(2, randomTypeAddAllTwoArgs.parameters.size) + assertEquals( + setOf( + randomTypeAddAllTwoArgs.parameters[1], + randomTypeAddAllTwoArgs.parameters[0], + randomTypeAddAllTwoArgs.receiver!! + ), + randomTypeAddAllTwoArgs.prevDFG + ) + } + + @Test + fun testPropagateArguments() { + val dfgTest = getDfgInferredCall() { defaultPasses() } + assertNotNull(dfgTest) + + val main = dfgTest.functions["main"] + assertNotNull(main) + + val memcpy = dfgTest.functions["memcpy"] + assertNotNull(memcpy) + val param0 = memcpy.parameters[0] + val param1 = memcpy.parameters[1] + + val call = main.calls["memcpy"] + assertNotNull(call) + + val argA = call.arguments[0] + assertNotNull(argA) + /* + The flows should be as follows: + VariableDeclaration["a"] -> Reference["a" (argument of call)] -CallingContextIn-> ParameterDeclaration -CallingContextOut-> Reference["a" (return)] + */ + + assertEquals(1, argA.nextDFG.size) + assertEquals(1, argA.prevDFG.size) + + val nextDfg = argA.nextDFGEdges.single() + assertEquals( + call, + ((nextDfg as? ContextSensitiveDataflow)?.callingContext as? CallingContextIn)?.call + ) + assertEquals(param0, nextDfg.end) + + val variableA = main.variables["a"] + assertNotNull(variableA) + assertEquals(mutableSetOf(variableA), argA.prevDFG) + + val prevDfgOfParam0 = param0.prevDFGEdges.singleOrNull { it !is ContextSensitiveDataflow } + assertNotNull(prevDfgOfParam0) + assertEquals(param1, prevDfgOfParam0.start) + + val returnA = main.allChildren().singleOrNull()?.returnValue as? Reference + assertNotNull(returnA) + + assertEquals(mutableSetOf(returnA), param0.nextDFG) + + // Check that also the CallingContext property is set correctly + val nextDfgOfParam0 = + param0.nextDFGEdges.singleOrNull { + ((it as? ContextSensitiveDataflow)?.callingContext as? CallingContextOut)?.call == + call + } + assertEquals(returnA, nextDfgOfParam0?.end) + } + + @Test + fun testPropagateArgumentsControlFlowInsensitive() { + // We don't use the ControlFlowSensitiveDFGPass here to check the method + // DFGPass.connectInferredCallArguments + val dfgTest = getDfgInferredCall { + this.registerPass() + registerPass() + registerPass() + registerPass() + registerPass() + registerPass() + registerPass() + registerPass() + } + assertNotNull(dfgTest) + + val main = dfgTest.functions["main"] + assertNotNull(main) + + val memcpy = dfgTest.functions["memcpy"] + assertNotNull(memcpy) + val param0 = memcpy.parameters[0] + val param1 = memcpy.parameters[1] + + val call = main.calls["memcpy"] + assertNotNull(call) + + val argA = call.arguments[0] + assertNotNull(argA) + /* + The flows should be as follows: + VariableDeclaration["a"] -> { Reference["a" (argument of call)], Reference["a" (return)] } + Reference["a" (argument of call)] -CallingContextIn-> ParameterDeclaration -CallingContextOut-> Reference["a" (argument of call)] -> VariableDeclaration["a"] + */ + + assertEquals(2, argA.nextDFG.size) + assertEquals(2, argA.prevDFG.size) + + val nextDfg = + argA.nextDFGEdges.singleOrNull { + ((it as? ContextSensitiveDataflow)?.callingContext as? CallingContextIn)?.call == + call + } + assertNotNull(nextDfg) + assertEquals(param0, nextDfg.end) + + val variableA = main.variables["a"] + assertNotNull(variableA) + assertEquals(mutableSetOf(variableA, param0), argA.prevDFG) + + val prevDfgOfParam0 = param0.prevDFGEdges.singleOrNull { it !is ContextSensitiveDataflow } + assertNotNull(prevDfgOfParam0) + assertEquals(param1, prevDfgOfParam0.start) + + val returnA = main.allChildren().singleOrNull()?.returnValue as? Reference + assertNotNull(returnA) + + assertEquals(mutableSetOf(argA), param0.nextDFG) + + assertEquals(mutableSetOf(returnA, argA), variableA.nextDFG) + + // Check that also the CallingContext property is set correctly + val nextDfgOfParam0 = + param0.nextDFGEdges.singleOrNull { + ((it as? ContextSensitiveDataflow)?.callingContext as? CallingContextOut)?.call == + call + } + assertEquals(argA, nextDfgOfParam0?.end) + } + + companion object { + fun getDfgInferredCall( + customConfig: TranslationConfiguration.Builder.() -> TranslationConfiguration.Builder = + { + this + } + ): TranslationResult { + val config = + TranslationConfiguration.builder() + .registerLanguage(TestLanguage(".")) + .registerFunctionSummaries(File("src/test/resources/function-dfg.yml")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferDfgForUnresolvedCalls(true) + .inferFunctions(true) + .build() + ) + .customConfig() + .build() + /* + int main() { + char *a = 7; + char *b = 5; + memcpy(a, b, 1); + return a; + } + */ + return GraphExamples.testFrontend(config).build { + translationResult { + translationUnit("DfgInferredCall.c") { + function("main", t("int")) { + body { + declare { + variable("a", t("char").pointer()) { literal(7, t("char")) } + } + + declare { + variable("b", t("char").pointer()) { literal(5, t("char")) } + } + + call("memcpy") { + ref("a") + ref("b") + literal(1, t("int")) + } + + returnStmt { ref("a") } + } + } + } + } + } + } + } +} diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilderTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilderTest.kt new file mode 100644 index 0000000000..f5ce80f271 --- /dev/null +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilderTest.kt @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph + +import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration +import de.fraunhofer.aisec.cpg.graph.edge.CallingContextIn +import de.fraunhofer.aisec.cpg.graph.edge.ContextSensitiveDataflow +import de.fraunhofer.aisec.cpg.graph.edge.PartialDataflowGranularity +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Literal +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertTrue + +class ExpressionBuilderTest { + @Test + fun testDuplicateWithDFGProperties() { + val node1 = Literal() + val node2 = Reference() + val granularity = PartialDataflowGranularity(FieldDeclaration()) + val callingContextIn = CallingContextIn(CallExpression()) + node1.addPrevDFG(node2, granularity, callingContextIn) + + val clone = node1.duplicate(false) + val clonedPrevDFG = clone.prevDFGEdges.single() + assertTrue(clonedPrevDFG is ContextSensitiveDataflow) + assertEquals(callingContextIn, clonedPrevDFG.callingContext) + assertEquals(granularity, clonedPrevDFG.granularity) + + assertEquals(setOf(node1, clone), node2.nextDFG) + } + + @Test + fun testDuplicateWithDFGProperties2() { + val node1 = Literal() + val node2 = Reference() + val granularity = PartialDataflowGranularity(FieldDeclaration()) + val callingContextIn = CallingContextIn(CallExpression()) + node1.addNextDFG(node2, granularity, callingContextIn) + + val clone = node1.duplicate(false) + val clonedPrevDFG = clone.nextDFGEdges.single() + assertTrue(clonedPrevDFG is ContextSensitiveDataflow) + assertEquals(callingContextIn, clonedPrevDFG.callingContext) + assertEquals(granularity, clonedPrevDFG.granularity) + + assertEquals(setOf(node1, clone), node2.prevDFG) + } +} diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/DFGTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/DFGTest.kt index 441ded7cbb..fa5bbee1db 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/DFGTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/DFGTest.kt @@ -489,7 +489,7 @@ class DFGTest { @Throws(Exception::class) fun testOutgoingDFGFromVariableDeclaration() { // TODO: IMHO this test is quite useless and can be merged into another one (e.g. - // testControlSensitiveDFGPassIfMerge). + // testControlSensitiveDFGPassIfMerge). val result = GraphExamples.getBasicSlice() val varA = TestUtils.findByUniqueName(result.variables, "a") diff --git a/cpg-core/src/test/resources/function-dfg.json b/cpg-core/src/test/resources/function-dfg.json new file mode 100644 index 0000000000..1a00d077f4 --- /dev/null +++ b/cpg-core/src/test/resources/function-dfg.json @@ -0,0 +1,69 @@ +[ + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage", + "methodName": "java.util.List.addAll", + "signature": ["int", "java.util.Object"] + }, + "dataFlows": [ + { + "from": "param1", + "to": "base", + "dfgType": "full" + } + ] + }, + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage", + "methodName": "java.util.List.addAll", + "signature": ["java.util.Object"] + }, + "dataFlows": [ + { + "from": "param0", + "to": "base", + "dfgType": "full" + } + ] + }, + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage", + "methodName": "java.util.List.add" + }, + "dataFlows": [ + { + "from": "param0", + "to": "base", + "dfgType": "full" + } + ] + }, + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.cxx.CLanguage", + "methodName": "memcpy" + }, + "dataFlows": [ + { + "from": "param1", + "to": "param0", + "dfgType": "full" + } + ] + }, + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.TestLanguage", + "methodName": "memcpy" + }, + "dataFlows": [ + { + "from": "param1", + "to": "param0", + "dfgType": "full" + } + ] + } +] \ No newline at end of file diff --git a/cpg-core/src/test/resources/function-dfg.yml b/cpg-core/src/test/resources/function-dfg.yml new file mode 100644 index 0000000000..25489b6a3c --- /dev/null +++ b/cpg-core/src/test/resources/function-dfg.yml @@ -0,0 +1,44 @@ +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage + methodName: java.util.List.addAll + signature: + - int + - java.util.Object + dataFlows: + - from: param1 + to: base + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage + methodName: java.util.List.addAll + signature: + - java.util.Object + dataFlows: + - from: param0 + to: base + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage + methodName: java.util.List.add + dataFlows: + - from: param0 + to: base + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CLanguage + methodName: memcpy + dataFlows: + - from: param1 + to: param0 + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.TestLanguage + methodName: memcpy + dataFlows: + - from: param1 + to: param0 + dfgType: full \ No newline at end of file diff --git a/cpg-core/src/test/resources/function-dfg2.yml b/cpg-core/src/test/resources/function-dfg2.yml new file mode 100644 index 0000000000..b13bdebae9 --- /dev/null +++ b/cpg-core/src/test/resources/function-dfg2.yml @@ -0,0 +1,64 @@ +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.TestLanguage + methodName: test.List.addAll + signature: + - int + - test.Object + dataFlows: + - from: param1 + to: base + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.TestLanguage + methodName: test.List.addAll + signature: + - int + - test.List + dataFlows: + - from: base + to: param0 + dfgType: full + - from: param1 + to: return + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.TestLanguage + methodName: test.VerySpecialList.addAll + signature: + - int + - test.Object + dataFlows: + - from: base + to: param0 + dfgType: full + - from: param1 + to: return + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.TestLanguage + methodName: test.List.addAll + signature: + - test.List + dataFlows: + - from: param0 + to: base + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.TestLanguage + methodName: test.List.add + dataFlows: + - from: param0 + to: base + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.TestLanguage + methodName: memcpy + dataFlows: + - from: param1 + to: param0 + dfgType: full \ No newline at end of file diff --git a/docs/docs/CPG/specs/dfg-function-summaries.md b/docs/docs/CPG/specs/dfg-function-summaries.md new file mode 100644 index 0000000000..8bf268b9dd --- /dev/null +++ b/docs/docs/CPG/specs/dfg-function-summaries.md @@ -0,0 +1,120 @@ +# Specification: Data Flow Graph - Function Summaries + +For functions and methods which are part of the analyzed codebase, the CPG can track data flows interprocedurally to some extent. +However, for all functions and methods which cannot be analyzed, we have no information available. +For this case, we provide the user a way to specify custom summaries of the data flows through the function. +To do so, you need to fill a JSON or YAML file as follows: + +* The outer element is a list/array +* In this list, you add elements, each of which summarizes the flows for one function/method +* The element consists of two objects: The `functionDeclaration` and the `dataFlows` +* The `functionDeclaration` consists of: + * `language`: The FQN of the `Language` element which this function is relevant for. + * `methodName`: The FQN of the function or method. We use this one to identify the relevant function/method. Do not forget to add the class name and use the separators as specified by the `Language`. + * `signature` (*optional*): This optional element allows us to differentiate between overloaded functions (i.e., two functions have the same FQN but accept different arguments). If no `signature` is specified, it matches to any function/method with the name you specified. The `signature` is a list of FQNs of the types (as strings) +* The `dataFlows` element is a list of objects with the following elements: + * `from`: A description of the start-node of a DFG-edge. Valid options: + * `paramX`: where `X` is the offset (we start counting with 0) + * `base`: the receiver of the method (i.e., the object the method is called on) + * `to`: A description of the end-node of the DFG-edge. Valid options: + * `paramX` where `X` is the offset (we start counting with 0) + * `base` the receiver of the method (i.e., the object the method is called on) + * `return` the return value of the function + * `returnX` where `X` is a number and specifies the index of the return value (if multiple values are returned). + * `dfgType`: Here, you can give more information. Currently, this is unused but should later allow us to add the properties to the edge. + +An example of a file could look as follows: + +=== "JSON" + + ```json + [ + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage", + "methodName": "java.util.List.addAll", + "signature": ["int", "java.util.Object"] + }, + "dataFlows": [ + { + "from": "param1", + "to": "base", + "dfgType": "full" + } + ] + }, + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage", + "methodName": "java.util.List.addAll", + "signature": ["java.util.Object"] + }, + "dataFlows": [ + { + "from": "param0", + "to": "base", + "dfgType": "full" + } + ] + }, + { + "functionDeclaration": { + "language": "de.fraunhofer.aisec.cpg.frontends.cxx.CLanguage", + "methodName": "memcpy" + }, + "dataFlows": [ + { + "from": "param1", + "to": "param0", + "dfgType": "full" + } + ] + } + ] + ``` + +=== "YAML" + + ```yml + - functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage + methodName: java.util.List.addAll + signature: + - int + - java.util.Object + dataFlows: + - from: param1 + to: base + dfgType: full + + - functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.java.JavaLanguage + methodName: java.util.List.addAll + signature: + - java.util.Object + dataFlows: + - from: param0 + to: base + dfgType: full + + - functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CLanguage + methodName: memcpy + dataFlows: + - from: param1 + to: param0 + dfgType: full + ``` + +This file configures the following edges: +* For a method declaration in Java `java.util.List.addAll(int, java.util.Object)`, the parameter 1 flows to the base (i.e., the list object) +* For a method declaration in Java `java.util.List.addAll(java.util.Object)`, the parameter 0 flows to the base (i.e., the list object) +* For a function declaration in C `memcpy` (and thus also CXX `std::memcpy`), the parameter 1 flows to parameter 0. + + +Note: If multiple function summaries match a method/function declaration (after the normal matching considering the language, local name of the function/method, signature if applicable and type hierarchy of the base object), we use the following routine to identify ideally a single entry: +1. We filter for existing signatures since it's more precisely specified than the generic "catch all" without a signature-element. +2. We filter for the most precise class of the base. +3. If there are still multiple options, we take the longest signature. +4. If this also didn't help to get a precise result, we iterate through the parameters and for index `i`, we pick the entry with the most precise matching type. We start with index 0 and count upwards, so if param0 leads to a single result, we're done and other entries won't be considered even if all the remaining parameters are more precise or whatever. +5. If nothing helped to get a unique entry, we pick the first remaining entry and hope it's the most precise one. \ No newline at end of file diff --git a/docs/docs/CPG/specs/index.md b/docs/docs/CPG/specs/index.md index 89971d3543..354ff321ed 100755 --- a/docs/docs/CPG/specs/index.md +++ b/docs/docs/CPG/specs/index.md @@ -16,4 +16,5 @@ links to the specifications of the following concepts: * Explore our [Graph Model](./graph) * [Data Flow Graph (DFG)](./dfg) +* [Data Flow Graph (DFG) Function Summaries](./dfg-function-summaries.md) * [Evaluation Order Graph (EOG)](./eog) diff --git a/docs/docs/GettingStarted/library.md b/docs/docs/GettingStarted/library.md index da8bfe840d..4f2b384943 100644 --- a/docs/docs/GettingStarted/library.md +++ b/docs/docs/GettingStarted/library.md @@ -59,6 +59,8 @@ val translationConfig = TranslationConfiguration For a complete list of available methods, please check the KDoc. +If you want/have to specify data flow summaries for some methods or functions, you add the method `registerFunctionSummary` when building the `TranslationCOnfiguration` and add a file with the format specified [here](../CPG/specs/dfg-function-summaries.md) + ## 3. Running the analysis Now it's time to get the CPG. All you have to do is to run the analysis with the diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml index 7ab9c29691..1464c2678f 100755 --- a/docs/mkdocs.yaml +++ b/docs/mkdocs.yaml @@ -161,6 +161,7 @@ nav: - CPG/specs/index.md - "Graph Schema": CPG/specs/graph.md - "Dataflow Graph (DFG)": CPG/specs/dfg.md + - "Dataflow Graph (DFG) Function Summaries": CPG/specs/dfg-function-summaries.md - "Evaluation Order Graph (EOG)": CPG/specs/eog.md - "Implementation": - CPG/impl/index.md diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index f13f46832a..28a4313a66 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -27,6 +27,7 @@ neo4j-ogm-bolt-driver = { module = "org.neo4j:neo4j-ogm-bolt-driver", version.re javaparser = { module = "com.github.javaparser:javaparser-symbol-solver-core", version = "3.25.4"} jackson = { module = "com.fasterxml.jackson.module:jackson-module-kotlin", version = "2.17.0"} +jacksonyml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml", version = "2.17.0"} eclipse-runtime = { module = "org.eclipse.platform:org.eclipse.core.runtime", version = "3.31.0"} osgi-service = { module = "org.osgi:org.osgi.service.prefs", version = "1.1.2"} icu4j = { module = "com.ibm.icu:icu4j", version = "74.2"}