Skip to content

Commit

Permalink
Fixing a multithreading bug in WordpieceTokenizer.
Browse files Browse the repository at this point in the history
  • Loading branch information
Craigacp committed Oct 25, 2024
1 parent b20b2d8 commit 51aa172
Showing 1 changed file with 5 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -48,7 +48,7 @@
* and Chinese characters. The resulting tokens are then applied to the
* wordpiece algorithm implemented in {@link Wordpiece} which is driven by an
* input vocabulary which matches tokens and token suffixes as it can. Any
* tokens that are not found in the input vocbulary are marked as "unknown".
* tokens that are not found in the input vocabulary are marked as "unknown".
*/
public class WordpieceTokenizer implements Tokenizer {

Expand Down Expand Up @@ -133,7 +133,7 @@ public boolean advance() {
currentToken = this.whitespaceTokenizer.getToken();
getWordpieceTokens();
currentWordpieceIndex = 0;
if (currentWordpieceTokens.size() == 0) {
if (currentWordpieceTokens.isEmpty()) {
return advance();
}
return true;
Expand Down Expand Up @@ -181,7 +181,7 @@ private void getWordpieceTokens() {

List<String> wordpieces = wordpiece.wordpiece(text);

if (wordpieces.size() == 0) {
if (wordpieces.isEmpty()) {
return;
} else if (wordpieces.size() == 1) {
String wp = wordpieces.get(0);
Expand Down Expand Up @@ -245,7 +245,7 @@ public WordpieceTokenizer clone() {
copy.basicTokenizer = basicTokenizer.clone();
copy.reset = false;
copy.currentToken = null;
copy.currentWordpieceTokens.clear();
copy.currentWordpieceTokens = new ArrayList<>();
copy.currentWordpieceIndex = -1;
return copy;
} catch (CloneNotSupportedException e) {
Expand Down

0 comments on commit 51aa172

Please sign in to comment.