diff --git a/src/AttributedString.php b/src/AttributedString.php
index 33b8200..7f0e2a8 100644
--- a/src/AttributedString.php
+++ b/src/AttributedString.php
@@ -401,7 +401,7 @@ public function attributeToString($attribute, $true = "-", $false = " ") {
    *
    * May improve performance if setPattern is used extensively
    */
-  public function enablebyteToCharCache() {
+  public function enableByteToCharCache() {
     $this->byteToChar = [];
     $char = 0;
     for ($i = 0; $i < strlen($this->string); ) {
diff --git a/src/TokenizedAttributedString.php b/src/TokenizedAttributedString.php
index 9301a86..773d5df 100644
--- a/src/TokenizedAttributedString.php
+++ b/src/TokenizedAttributedString.php
@@ -19,6 +19,8 @@ class TokenizedAttributedString extends AttributedString
    * @param string $tokenizer Tokenizer to use, either "whitespace", "word" or a custom regex
    */
   public function __construct($string, $tokenizer = "whitespace") {
+    parent::__construct($string);
+    
     $tokenizerFunction = "tokenizeOn".ucfirst($tokenizer);
 
     if ($tokenizer[0] == "/") {
@@ -30,7 +32,11 @@ public function __construct($string, $tokenizer = "whitespace") {
       list($this->tokens, $this->tokenOffsets) = self::$tokenizerFunction($string);
     }
     
-    parent::__construct($string);
+    // convert byte to char offsets
+    $this->enableByteToCharCache();
+    $this->tokenOffsets = array_map(function($o) {
+      return $this->byteToCharOffset($o);
+    }, $this->tokenOffsets);
   }
   
   /**
@@ -148,7 +154,7 @@ public function lowercaseTokens() {
    * Tokenize a string on whitespace
    *
    * @param string $string string to be tokenized
-   * @return array array of two arrays, with tokens at index 0 and their offsets at index 1
+   * @return array array of two arrays, with tokens at index 0 and their byte offsets at index 1
    */
   public static function tokenizeOnWhitespace($string) {
     // Matches pontential whitespace in front of the token and the token itself.
@@ -160,7 +166,7 @@ public static function tokenizeOnWhitespace($string) {
    * Tokenize a string on words
    *
    * @param string $string string to be tokenized
-   * @return array array of two arrays, with tokens at index 0 and their offsets at index 1
+   * @return array array of two arrays, with tokens at index 0 and their byte offsets at index 1
    */
   public static function tokenizeOnWords($string) {
     return self::tokenizeOnRegex($string, '/([\p{L}\p{S}\p{N}]+)/u');
@@ -171,7 +177,7 @@ public static function tokenizeOnWords($string) {
    *
    * @param string $string string to be tokenized
    * @param string $pattern regex. The token must be captured in the first subgroup.
-   * @return array array of two arrays, with tokens at index 0 and their offsets at index 1
+   * @return array array of two arrays, with tokens at index 0 and their byte offsets at index 1
    */
   public static function tokenizeOnRegex($string, $pattern)
   {
diff --git a/test/unit/TokenizedAttributedStringTest.php b/test/unit/TokenizedAttributedStringTest.php
index c195e16..dc69546 100644
--- a/test/unit/TokenizedAttributedStringTest.php
+++ b/test/unit/TokenizedAttributedStringTest.php
@@ -31,6 +31,10 @@ public function testGetToken() {
   public function testGetTokenOffset() {
     $as = new TokenizedAttributedString(" one two\nthree\rfour\n\r five  ");
     $this->assertEquals(9, $as->getTokenOffset(2));
+    
+    $as = new TokenizedAttributedString("ä ö ü");
+    $this->assertEquals(2, $as->getTokenOffset(1));
+    
   }
   
   public function testGetTokenCount() {