Skip to content

Commit

Permalink
test and fix for a bug in TokenizedAttributedString
Browse files Browse the repository at this point in the history
  • Loading branch information
apemsel committed Mar 3, 2016
1 parent f5c94b4 commit bc8764e
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
10 changes: 5 additions & 5 deletions src/TokenizedAttributedString.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public function getTokenOffset($i) {
public function setTokenAttribute($i, $attribute, $state = true) {
$token = $this->tokens[$i];
$offset = $this->tokenOffsets[$i];
$length = strlen($token);
$length = mb_strlen($token, "utf-8");

return $this->setLength($offset, $length, $attribute, $state);
}
Expand All @@ -105,7 +105,7 @@ public function setTokenAttribute($i, $attribute, $state = true) {
*/
public function setTokenRangeAttribute($from, $to, $attribute, $state = true) {
$fromOffset = $this->tokenOffsets[$from];
$toOffset = $this->tokenOffsets[$to] + strlen($this->tokens[$to]);
$toOffset = $this->tokenOffsets[$to] + mb_strlen($this->tokens[$to], "utf-8");

return $this->setRange($fromOffset, $toOffset, $attribute, $state);
}
Expand Down Expand Up @@ -148,7 +148,7 @@ public function lowercaseTokens() {
* Tokenize a string on whitespace
*
* @param string $string string to be tokenized
* @return string[] tokens
* @return array array of two arrays, with tokens at index 0 and their offsets at index 1
*/
public static function tokenizeOnWhitespace($string) {
// Matches pontential whitespace in front of the token and the token itself.
Expand All @@ -160,7 +160,7 @@ public static function tokenizeOnWhitespace($string) {
* Tokenize a string on words
*
* @param string $string string to be tokenized
* @return string[] tokens
* @return array array of two arrays, with tokens at index 0 and their offsets at index 1
*/
public static function tokenizeOnWords($string) {
return self::tokenizeOnRegex($string, '/([\w]+)/u');
Expand All @@ -171,7 +171,7 @@ public static function tokenizeOnWords($string) {
*
* @param string $string string to be tokenized
* @param string $pattern regex. The token must be captured in the first subgroup.
* @return string[] tokens
* @return array array of two arrays, with tokens at index 0 and their offsets at index 1
*/
public static function tokenizeOnRegex($string, $pattern)
{
Expand Down
2 changes: 1 addition & 1 deletion test/unit/TokenizedAttributedStringTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public function testGetTokenCount() {
}

public function testSetTokenAttribute() {
$as = new TokenizedAttributedString("foo bar baz");
$as = new TokenizedAttributedString("foo bär baz");
$as->setTokenAttribute(1, "bold");
$this->assertEquals(true, $as->is("bold", 5));
$this->assertEquals(false, $as->is("bold", 3));
Expand Down

0 comments on commit bc8764e

Please sign in to comment.