Skip to content

Commit

Permalink
Add location based restrictions to call context query (#1166)
Browse files Browse the repository at this point in the history
* feat: first shot at file filtering for call-context queries

* feat: includeUndefinedFiles defaults to true

* feat-doc: added comments

* feat: fixed issues with file filter in call context queries

* doc-fix: added missing newline

* doc: added doc for file filters in call context queries
  • Loading branch information
LukasPietzschmann authored Nov 21, 2024
1 parent 242d064 commit 6017542
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 4 deletions.
8 changes: 8 additions & 0 deletions src/documentation/print-query-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ Besides this, we provide the following ways to automatically categorize and link
For now, we _only_ offer support for linking to the last call, as the current flow dependency over-approximation is not stable.
4. **Aliases** (\`includeAliases\`): Consider a case like \`f <- function_of_interest\`, do you want calls to \`f\` to be included in the results? There is probably no need to combine this with a global call target!
It's also possible to filter the results based on the following properties:
1. **File** (\`fileFilter\`): This allows you to filter the results based on the file in which the call is located. This can be useful if you are only interested in calls in, e.g., specific folders.
The \`fileFilter\` property is an object made up of two properties:
- **Filter** (\`filter\`): A regular expression that a node's file attribute must match to be considered.
- **Include Undefined Files** (\`includeUndefinedFiles\`): If \`fileFilter\` is set, but a node's file attribute is not present, should we include it in the results? Defaults to \`true\`.
Re-using the example code from above, the following query attaches all calls to \`mean\` to the kind \`visualize\` and the subkind \`text\`,
all calls that start with \`read_\` to the kind \`input\` but only if they are not locally overwritten, and the subkind \`csv-file\`, and links all calls to \`points\` to the last call to \`plot\`:
Expand Down Expand Up @@ -98,6 +105,7 @@ my_test_function()
`;
}
});

registerQueryDocumentation('dataflow', {
name: 'Dataflow Query',
type: 'active',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import type {
CallContextQueryKindResult,
CallContextQueryResult,
CallContextQuerySubKindResult,
FileFilter,
SubCallContextQueryFormat
} from './call-context-query-format';
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';
Expand Down Expand Up @@ -64,6 +65,10 @@ function promoteQueryCallNames(queries: readonly CallContextQuery[]): { promoted
...q,
callName: q.callNameExact ? exactCallNameRegex(q.callName)
: new RegExp(q.callName),
fileFilter: q.fileFilter && {
...q.fileFilter,
filter: new RegExp(q.fileFilter.filter)
},
linkTo: {
...q.linkTo,
/* we have to add another promotion layer whenever we add something without this call name */
Expand All @@ -74,7 +79,11 @@ function promoteQueryCallNames(queries: readonly CallContextQuery[]): { promoted
return {
...q,
callName: q.callNameExact ? exactCallNameRegex(q.callName)
: new RegExp(q.callName)
: new RegExp(q.callName),
fileFilter: q.fileFilter && {
...q.fileFilter,
filter: new RegExp(q.fileFilter.filter)
}
};
}
});
Expand Down Expand Up @@ -156,6 +165,16 @@ function removeIdenticalDuplicates(collector: TwoLayerCollector<string, string,
}
}

function doesFilepathMatch(file: string | undefined, filter: FileFilter<RegExp> | undefined): boolean {
if(filter === undefined) {
return true;
}
if(file === undefined) {
return filter.includeUndefinedFiles ?? true;
}
return filter.filter.test(file);
}

/**
* Multi-stage call context query resolve.
*
Expand Down Expand Up @@ -203,6 +222,11 @@ export function executeCallContextQueries({ graph, ast }: BasicQueryData, querie
}

for(const query of promotedQueries.filter(q => q.callName.test(info.name))) {
const file = ast.idMap.get(nodeId)?.info.file;
if(!doesFilepathMatch(file, query.fileFilter)) {
continue;
}

let targets: NodeId[] | 'no' | undefined = undefined;
if(query.callTargets) {
targets = satisfiesCallTargets(nodeId, graph, query.callTargets);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,21 @@ import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline';
import type { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines';
import { CallTargets } from './identify-link-to-last-call-relation';

export interface DefaultCallContextQueryFormat<CallName extends RegExp | string> extends BaseQueryFormat {
export interface FileFilter<FilterType> {
/**
* Regex that a node's file attribute must match to be considered
*/
readonly filter: FilterType;
/**
* If `fileFilter` is set, but a nodes `file` attribute is `undefined`, should we include it in the results? Defaults to `true`.
*/
readonly includeUndefinedFiles?: boolean;
}

export interface DefaultCallContextQueryFormat<RegexType extends RegExp | string> extends BaseQueryFormat {
readonly type: 'call-context';
/** Regex regarding the function name, please note that strings will be interpreted as regular expressions too! */
readonly callName: CallName;
readonly callName: RegexType;
/**
* Should we automatically add the `^` and `$` anchors to the regex to make it an exact match?
*/
Expand All @@ -32,6 +43,10 @@ export interface DefaultCallContextQueryFormat<CallName extends RegExp | string>
* Consider a case like `f <- function_of_interest`, do you want uses of `f` to be included in the results?
*/
readonly includeAliases?: boolean;
/**
* Filter that, when set, a node's file attribute must match to be considered
*/
readonly fileFilter?: FileFilter<RegexType>;
}

/**
Expand Down Expand Up @@ -98,7 +113,11 @@ export const CallContextQueryDefinition = {
subkind: Joi.string().optional().description('The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`'),
callTargets: Joi.string().valid(...Object.values(CallTargets)).optional().description('Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve.'),
includeAliases: Joi.boolean().optional().description('Consider a case like `f <- function_of_interest`, do you want uses of `f` to be included in the results?'),
linkTo: Joi.object({
fileFilter: Joi.object({
fileFilter: Joi.string().required().description('Regex that a node\'s file attribute must match to be considered'),
includeUndefinedFiles: Joi.boolean().optional().description('If `fileFilter` is set, but a nodes `file` attribute is `undefined`, should we include it in the results? Defaults to `true`.')
}).optional().description('Filter that, when set, a node\'s file attribute must match to be considered'),
linkTo: Joi.object({
type: Joi.string().valid('link-to-last-call').required().description('The type of the linkTo sub-query.'),
callName: Joi.string().required().description('Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression.')
}).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.')
Expand Down

2 comments on commit 6017542

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 6017542 Previous: 2380113 Ratio
Retrieve AST from R code 246.4932475909091 ms (106.3225202474029) 238.99386745454547 ms (99.5397110373604) 1.03
Normalize R AST 17.23065140909091 ms (29.97360345109044) 17.344255954545453 ms (30.98539214696861) 0.99
Produce dataflow information 60.94881086363637 ms (128.2161781809194) 60.94850918181818 ms (127.79646053550617) 1.00
Total per-file 856.6658115454545 ms (1556.9435201245303) 839.1842053181819 ms (1529.9296327924758) 1.02
Static slicing 2.0884953465409675 ms (1.1287373607195745) 2.0548017594197465 ms (1.212534826826947) 1.02
Reconstruct code 0.23941507563013897 ms (0.17765086167870764) 0.23833344722067284 ms (0.18520720238626878) 1.00
Total per-slice 2.34218304620429 ms (1.1936598604915731) 2.306734662937843 ms (1.2769510302954954) 1.02
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7878911077490998 # 0.7869360165281424 # 1.00
reduction (normalized tokens) 0.7651712060744233 # 0.7639690077689504 # 1.00
memory (df-graph) 95.46617542613636 KiB (244.77619956879823) 95.46617542613636 KiB (244.77619956879823) 1

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 6017542 Previous: 2380113 Ratio
Retrieve AST from R code 247.81986214 ms (46.60339251854539) 246.34857636 ms (46.05736358259125) 1.01
Normalize R AST 18.97386 ms (14.495631304298653) 19.12205878 ms (14.425168152928613) 0.99
Produce dataflow information 74.76744276000001 ms (71.76208350663104) 74.45471131999999 ms (70.57749655074137) 1.00
Total per-file 7777.37814308 ms (28855.670498234602) 7726.72160172 ms (28528.07523908248) 1.01
Static slicing 16.06714210846099 ms (44.092632744371436) 15.999780665624812 ms (43.68226004466487) 1.00
Reconstruct code 0.29376293538086495 ms (0.15953293380140443) 0.2820169656616924 ms (0.15417410861831182) 1.04
Total per-slice 16.36969559213324 ms (44.12159040977069) 16.29007209329986 ms (43.71440479393139) 1.00
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.8712997340230448 # 0.8712997340230448 # 1
reduction (normalized tokens) 0.8102441553774778 # 0.8102441553774778 # 1
memory (df-graph) 99.4425 KiB (113.62933451202426) 99.4425 KiB (113.62933451202426) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.