From 24f053cbb1c43d63de0002cdb45ec2046e9eca06 Mon Sep 17 00:00:00 2001 From: Adrian Pemsel Date: Tue, 8 Mar 2016 15:36:00 +0100 Subject: [PATCH] fix bug in tokenizeOnWords --- src/TokenizedAttributedString.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TokenizedAttributedString.php b/src/TokenizedAttributedString.php index 825e960..e8fa2a8 100644 --- a/src/TokenizedAttributedString.php +++ b/src/TokenizedAttributedString.php @@ -163,7 +163,7 @@ public static function tokenizeOnWhitespace($string) { * @return array array of two arrays, with tokens at index 0 and their offsets at index 1 */ public static function tokenizeOnWords($string) { - return self::tokenizeOnRegex($string, '/([\w]+)/u'); + return self::tokenizeOnRegex($string, '/(\p{L}+)/u'); } /**