Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Move support for semgrep #500

Merged
merged 11 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,6 @@
[submodule "lang/semgrep-grammars/src/tree-sitter-circom"]
path = lang/semgrep-grammars/src/tree-sitter-circom
url = https://github.com/Decurity/tree-sitter-circom.git
[submodule "lang/semgrep-grammars/src/tree-sitter-move"]
path = lang/semgrep-grammars/src/tree-sitter-move
url = https://github.com/tzakian/tree-sitter-move.git
1 change: 1 addition & 0 deletions lang/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ SUPPORTED_DIALECTS = \
kotlin \
lua \
make \
move \
move-on-aptos \
ocaml \
php \
Expand Down
1 change: 1 addition & 0 deletions lang/move/Makefile
7 changes: 7 additions & 0 deletions lang/move/extensions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# File extensions for the target language, one per line. This is used for
# collecting parsing stats from the repos specified in 'projects.txt'. e.g.:
#
# .h
# .c
#
.move
3 changes: 3 additions & 0 deletions lang/move/fyi.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
semgrep-grammars/src/tree-sitter-move/LICENSE
semgrep-grammars/src/tree-sitter-move/grammar.js
semgrep-grammars/src/semgrep-move/grammar.js
6 changes: 6 additions & 0 deletions lang/move/projects.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Git URLs of publicly-accessible projects to be used for parsing stats,
# one per line.
#
https://github.com/AftermathFinance/move-interfaces
https://github.com/Bucket-Protocol/bucket-interface
https://github.com/MystenLabs/sui-axelar
7 changes: 7 additions & 0 deletions lang/move/test/ok/hello.move
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module hello_sui::hello {
public fun say_hello() {
let message = b"Hello, World!";
// Print the message
std::debug::print(message);
}
}
110 changes: 110 additions & 0 deletions lang/move/test/ok/string.move
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright (c) Mysten Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

/// The `string` module defines the `String` type which represents UTF8 encoded strings.
module std::string {
use std::ascii;
use std::vector;
use std::option::{Self, Option};

/// An invalid UTF8 encoding.
const EINVALID_UTF8: u64 = 1;

/// Index out of range.
const EINVALID_INDEX: u64 = 2;

/// A `String` holds a sequence of bytes which is guaranteed to be in utf8 format.
struct String has copy, drop, store {
bytes: vector<u8>,
}

/// Creates a new string from a sequence of bytes. Aborts if the bytes do not represent valid utf8.
public fun utf8(bytes: vector<u8>): String {
assert!(internal_check_utf8(&bytes), EINVALID_UTF8);
String{bytes}
}

/// Convert an ASCII string to a UTF8 string
public fun from_ascii(s: ascii::String): String {
String { bytes: ascii::into_bytes(s) }
}

/// Convert an UTF8 string to an ASCII string.
/// Aborts if `s` is not valid ASCII
public fun to_ascii(s: String): ascii::String {
let String { bytes } = s;
ascii::string(bytes)
}

/// Tries to create a new string from a sequence of bytes.
public fun try_utf8(bytes: vector<u8>): Option<String> {
if (internal_check_utf8(&bytes)) {
option::some(String{bytes})
} else {
option::none()
}
}

/// Returns a reference to the underlying byte vector.
public fun bytes(s: &String): &vector<u8> {
&s.bytes
}

/// Checks whether this string is empty.
public fun is_empty(s: &String): bool {
vector::is_empty(&s.bytes)
}

/// Returns the length of this string, in bytes.
public fun length(s: &String): u64 {
vector::length(&s.bytes)
}

/// Appends a string.
public fun append(s: &mut String, r: String) {
vector::append(&mut s.bytes, r.bytes)
}

/// Appends bytes which must be in valid utf8 format.
public fun append_utf8(s: &mut String, bytes: vector<u8>) {
append(s, utf8(bytes))
}

/// Insert the other string at the byte index in given string. The index must be at a valid utf8 char
/// boundary.
public fun insert(s: &mut String, at: u64, o: String) {
let bytes = &s.bytes;
assert!(at <= vector::length(bytes) && internal_is_char_boundary(bytes, at), EINVALID_INDEX);
let l = length(s);
let front = sub_string(s, 0, at);
let end = sub_string(s, at, l);
append(&mut front, o);
append(&mut front, end);
*s = front;
}

/// Returns a sub-string using the given byte indices, where `i` is the first byte position and `j` is the start
/// of the first byte not included (or the length of the string). The indices must be at valid utf8 char boundaries,
/// guaranteeing that the result is valid utf8.
public fun sub_string(s: &String, i: u64, j: u64): String {
let bytes = &s.bytes;
let l = vector::length(bytes);
assert!(
j <= l && i <= j && internal_is_char_boundary(bytes, i) && internal_is_char_boundary(bytes, j),
EINVALID_INDEX
);
String{bytes: internal_sub_string(bytes, i, j)}
}

/// Computes the index of the first occurrence of a string. Returns `length(s)` if no occurrence found.
public fun index_of(s: &String, r: &String): u64 {
internal_index_of(&s.bytes, &r.bytes)
}


// Native API
native fun internal_check_utf8(v: &vector<u8>): bool;
native fun internal_is_char_boundary(v: &vector<u8>, i: u64): bool;
native fun internal_sub_string(v: &vector<u8>, i: u64, j: u64): vector<u8>;
native fun internal_index_of(v: &vector<u8>, r: &vector<u8>): u64;
}
1 change: 1 addition & 0 deletions lang/semgrep-grammars/lang/move
1 change: 1 addition & 0 deletions lang/semgrep-grammars/src/semgrep-move/Makefile
29 changes: 29 additions & 0 deletions lang/semgrep-grammars/src/semgrep-move/grammar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
semgrep-move

Extends the standard move grammar with semgrep pattern constructs.
*/

const base_grammar = require('tree-sitter-move/grammar');

module.exports = grammar(base_grammar, {
name: 'move',

conflicts: ($, previous) => previous.concat([
]),

/*
Support for semgrep ellipsis ('...') and metavariables ('$FOO'),
if they're not already part of the base grammar.
*/
rules: {
/*
semgrep_ellipsis: $ => '...',

_expression: ($, previous) => choice(
$.semgrep_ellipsis,
...previous.members
),
*/
}
});
1 change: 1 addition & 0 deletions lang/semgrep-grammars/src/semgrep-move/prep
Empty file.
2 changes: 1 addition & 1 deletion lang/semgrep-grammars/src/tree-sitter-c-sharp
Submodule tree-sitter-c-sharp updated 70 files
+0 −39 .editorconfig
+36 −0 .github/workflows/build.yml
+0 −35 .github/workflows/ci.yml
+0 −112 Makefile
+26 −23 README.md
+6 −17 binding.gyp
+0 −16 bindings/c/tree-sitter-c_sharp.h
+0 −11 bindings/c/tree-sitter-c_sharp.pc.in
+0 −13 bindings/go/binding.go
+0 −15 bindings/go/binding_test.go
+0 −5 bindings/go/go.mod
+22 −14 bindings/node/binding.cc
+0 −28 bindings/node/index.d.ts
+15 −3 bindings/node/index.js
+0 −5 bindings/python/tree_sitter_c_sharp/__init__.py
+0 −1 bindings/python/tree_sitter_c_sharp/__init__.pyi
+0 −27 bindings/python/tree_sitter_c_sharp/binding.c
+0 −0 bindings/python/tree_sitter_c_sharp/py.typed
+0 −3 bindings/rust/build.rs
+2 −2 bindings/swift/TreeSitterCSharp/csharp.h
+481 −0 corpus/attributes.txt
+663 −0 corpus/classes.txt
+229 −231 corpus/contextual-keywords.txt
+59 −0 corpus/enums.txt
+957 −1,155 corpus/expressions.txt
+65 −47 corpus/identifiers.txt
+335 −0 corpus/interfaces.txt
+1,263 −0 corpus/literals.txt
+138 −62 corpus/preprocessor.txt
+436 −0 corpus/query-syntax.txt
+527 −0 corpus/records.txt
+273 −0 corpus/source-file-structure.txt
+2,033 −0 corpus/statements.txt
+140 −0 corpus/structs.txt
+83 −0 corpus/type-events.txt
+382 −0 corpus/type-fields.txt
+494 −0 corpus/type-methods.txt
+368 −368 corpus/type-operators.txt
+233 −0 corpus/type-properties.txt
+40 −54 grammar.js
+4 −25 package.json
+0 −33 pyproject.toml
+21 −26 queries/highlights.scm
+5 −5 script/file_sizes.txt
+0 −60 setup.py
+149 −307 src/grammar.json
+477 −708 src/node-types.json
+1,172,577 −948,918 src/parser.c
+0 −54 src/tree_sitter/alloc.h
+0 −290 src/tree_sitter/array.h
+13 −54 src/tree_sitter/parser.h
+3 −0 test.js
+0 −338 test/corpus/attributes.txt
+0 −365 test/corpus/classes.txt
+0 −22 test/corpus/enums.txt
+0 −210 test/corpus/interfaces.txt
+0 −793 test/corpus/literals.txt
+0 −265 test/corpus/query-syntax.txt
+0 −193 test/corpus/records.txt
+0 −186 test/corpus/source-file-structure.txt
+0 −1,086 test/corpus/statements.txt
+0 −73 test/corpus/structs.txt
+0 −67 test/corpus/type-events.txt
+0 −255 test/corpus/type-fields.txt
+0 −295 test/corpus/type-methods.txt
+0 −163 test/corpus/type-properties.txt
+1 −1 test/highlight/baseline.cs
+0 −95 test/highlight/operators.cs
+0 −67 test/highlight/types.cs
+0 −25 test/highlight/variableDeclarations.cs
1 change: 1 addition & 0 deletions lang/semgrep-grammars/src/tree-sitter-move
Submodule tree-sitter-move added at b454ad
2 changes: 1 addition & 1 deletion lang/semgrep-grammars/src/tree-sitter-move-on-aptos
Loading