Skip to content

Commit

Permalink
use strameline vocab by default if nmers > 1
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhiyuanChen committed Apr 29, 2024
1 parent c05dad6 commit ea6079c
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions multimolecule/tokenisers/rna/tokenization_rna.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import os
from typing import List, Optional

Expand Down Expand Up @@ -28,11 +30,12 @@ def __init__(
convert_to_uppercase: bool = True,
convert_T_to_U: bool = True,
nmers: int = 1,
strameline: bool = False,
strameline: bool | None = None,
**kwargs,
):
self.nmers = nmers
self.all_tokens = get_vocab_list(nmers, strameline)
self.strameline = strameline if strameline is not None else nmers > 1
self.all_tokens = get_vocab_list(nmers, self.strameline)
self._id_to_token = dict(enumerate(self.all_tokens))
self._token_to_id = {tok: ind for ind, tok in enumerate(self.all_tokens)}
self.convert_to_uppercase = convert_to_uppercase
Expand Down

0 comments on commit ea6079c

Please sign in to comment.