Skip to content

Commit

Permalink
ProteinTokenizerTransform
Browse files Browse the repository at this point in the history
  • Loading branch information
0x00b1 committed Jul 19, 2024
1 parent 785fc18 commit 6896b71
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ requires-python = ">=3.10"

[project.optional-dependencies]
all = [
"beignet[datasets,docs,mdtraj,test]",
"beignet[datasets,docs,mdtraj,test,tokenizers]",
]
datasets = [
"biopython",
Expand All @@ -44,6 +44,9 @@ test = [
"pytest-mock",
"scipy",
]
tokenizers = [
"transformers",
]

[tool.ruff]
lint.select = [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"cls_token": "<cls>",
"eos_token": "<eos>",
"mask_token": "<mask>",
"pad_token": "<pad>",
"unk_token": "<unk>"
"cls_token": "<cls>",
"eos_token": "<eos>",
"mask_token": "<mask>",
"pad_token": "<pad>",
"unk_token": "<unk>"
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"clean_up_tokenization_spaces": true,
"do_lower_case": false,
"model_max_length": 1024,
"tokenizer_class": "ProteinMLMTokenizer"
"clean_up_tokenization_spaces": true,
"do_lower_case": false,
"model_max_length": 1024,
"tokenizer_class": "ProteinTokenizer"
}

0 comments on commit 6896b71

Please sign in to comment.