implementation for TokenizedAttributedString getTokenCount() and make…

… tokenizers public
apemsel · Mar 3, 2016 · 7e7a771 · 7e7a771
1 parent 491d35a
commit 7e7a771
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 8 deletions.
diff --git a/src/AttributedString.php b/src/AttributedString.php
@@ -3,7 +3,7 @@
 
 /**
  * Basic class to work with attributed strings.
- * 
+ *
  * Attributed strings are strings that can have multiple attributes per character of the string
  *
  * @author Adrian Pemsel <[email protected]>

diff --git a/src/MutableAttributedString.php b/src/MutableAttributedString.php
@@ -1,5 +1,4 @@
 <?php
-
 namespace apemsel\AttributedString;
 
 /**

diff --git a/src/TokenizedAttributedString.php b/src/TokenizedAttributedString.php
@@ -1,5 +1,4 @@
 <?php
-
 namespace apemsel\AttributedString;
 
 /**
@@ -46,11 +45,20 @@ public function getTokens() {
   /**
    * Return all tokens' offsets
    *
-   * @return in[] offsets
+   * @return int[] offsets
    */
   public function getTokenOffsets() {
     return $this->tokenOffsets;
   }
+
+  /**
+   * Return the number of tokens
+   *
+   * @return int count
+   */
+  public function getTokenCount() {
+    return count($this->tokens);
+  }
 
   /**
    * Get indicated token
@@ -127,7 +135,7 @@ public function attributesAtToken($i) {
     return $this->attributesAt($this->tokenOffsets[$i]);
   }
 
-  /*
+  /**
    * Convert all tokens to lower case
    */
   public function lowercaseTokens() {
@@ -136,17 +144,36 @@ public function lowercaseTokens() {
     }, $this->tokens);
   }
 
-  protected static function tokenizeOnWhitespace($string) {
+  /**
+   * Tokenize a string on whitespace
+   *
+   * @param string $string string to be tokenized
+   * @return string[] tokens
+   */
+  public static function tokenizeOnWhitespace($string) {
     // Matches pontential whitespace in front of the token and the token itself.
     // Matching the whitespace could be omitted, but that results in slower execution ;-)
     return self::tokenizeOnRegex($string, '/[\s\n\r]*([^\s\n\r]+)/u');
   }
 
-  protected static function tokenizeOnWords($string) {
+  /**
+   * Tokenize a string on words
+   *
+   * @param string $string string to be tokenized
+   * @return string[] tokens
+   */
+  public static function tokenizeOnWords($string) {
     return self::tokenizeOnRegex($string, '/([\w]+)/u');
   }
 
-  protected static function tokenizeOnRegex($string, $pattern)
+  /**
+   * Tokenize a string with a given regex
+   *
+   * @param string $string string to be tokenized
+   * @param string $pattern regex. The token must be captured in the first subgroup.
+   * @return string[] tokens
+   */
+  public static function tokenizeOnRegex($string, $pattern)
   {
     // Fastest way to get both tokens and their offsets, but not easy to understand.
     preg_match_all($pattern, $string, $matches, PREG_OFFSET_CAPTURE);
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,7 +3,7 @@ @@
     /**
      * Basic class to work with attributed strings.
-     *
+     *
      * Attributed strings are strings that can have multiple attributes per character of the string
      *
      * @author Adrian Pemsel <[email protected]>
@@ Expand Down @@