diff --git a/doc/class-apemsel.AttributedString.AttributedString.html b/doc/class-apemsel.AttributedString.AttributedString.html index 8abb0bd..7a7c494 100644 --- a/doc/class-apemsel.AttributedString.AttributedString.html +++ b/doc/class-apemsel.AttributedString.AttributedString.html @@ -124,7 +124,7 @@

Direct known subclasses

Author: Adrian Pemsel apemsel@gmail.com
- Located at AttributedString.php + Located at AttributedString.php
@@ -552,6 +552,96 @@

Returns

+ + + + + + + public + string[] + + + + +
+ # + substrings( string $attribute, integer $offset = 0, boolean $state = true, boolean $strict = true ) + +
+

Return an array of substrings that have a given attribute

+
+ + +
+ + + + + public + string + + + + +
+ # + filter( string $attribute, integer $offset = 0, boolean $state = true, boolean $strict = true, string $glue = "" ) + +
+

Return all parts of the string that have a given attribute as new string

+
+ +
@@ -566,7 +656,7 @@

Returns

# - attributesAt( integer $pos ) + attributesAt( integer $pos )

Return all attributes at a given offset

@@ -604,7 +694,7 @@

Returns

# - toHtml( string $tag = "span", string $classPrefix = "" ) + toHtml( string $tag = "span", string $classPrefix = "" )

Convert to HTML, using a given class to mark attribute spans

@@ -648,7 +738,7 @@

Throws

# - combineAttributes( string $op, string $attribute1, string $attribute2 = false, string $to = false ) + combineAttributes( string $op, string $attribute1, string $attribute2 = false, string $to = false )

Combine attributes with the given boolean operation

@@ -692,7 +782,7 @@

Throws

# - enablebyteToCharCache( ) + enablebyteToCharCache( )

Enable and fill cache for byte to char offset conversion

@@ -723,7 +813,7 @@

Throws

# - byteToCharOffset( $boff ) + byteToCharOffset( $boff )
@@ -752,7 +842,7 @@

Throws

# - charToByteOffset( $char ) + charToByteOffset( $char )
@@ -781,7 +871,7 @@

Throws

# - byteToCharOffsetString( $string, $boff ) + byteToCharOffsetString( $string, $boff )
@@ -810,7 +900,7 @@

Throws

# - utf8CharLen( $byte ) + utf8CharLen( $byte )
@@ -839,7 +929,7 @@

Throws

# - count( ) + count( )

Return string length (number of UTF-8 chars, not strlen())

diff --git a/doc/class-apemsel.AttributedString.MutableAttributedString.html b/doc/class-apemsel.AttributedString.MutableAttributedString.html index 55b33d7..afdc575 100644 --- a/doc/class-apemsel.AttributedString.MutableAttributedString.html +++ b/doc/class-apemsel.AttributedString.MutableAttributedString.html @@ -270,6 +270,7 @@

See

createAttribute(), deleteAttribute(), enablebyteToCharCache(), + filter(), hasAttribute(), is(), searchAttribute(), @@ -277,6 +278,7 @@

See

setPattern(), setRange(), setSubstring(), + substrings(), toHtml(), utf8CharLen()

diff --git a/doc/class-apemsel.AttributedString.TokenizedAttributedString.html b/doc/class-apemsel.AttributedString.TokenizedAttributedString.html index 588a9cf..88e808f 100644 --- a/doc/class-apemsel.AttributedString.TokenizedAttributedString.html +++ b/doc/class-apemsel.AttributedString.TokenizedAttributedString.html @@ -662,6 +662,7 @@

Returns

createAttribute(), deleteAttribute(), enablebyteToCharCache(), + filter(), hasAttribute(), is(), searchAttribute(), @@ -669,6 +670,7 @@

Returns

setPattern(), setRange(), setSubstring(), + substrings(), toHtml(), utf8CharLen()

diff --git a/doc/source-class-apemsel.AttributedString.AttributedString.html b/doc/source-class-apemsel.AttributedString.AttributedString.html index 94499ef..a4a80f3 100644 --- a/doc/source-class-apemsel.AttributedString.AttributedString.html +++ b/doc/source-class-apemsel.AttributedString.AttributedString.html @@ -259,7 +259,7 @@

Classes

175 $a = $this->attributes[$attribute]; 176 177 if ($offset) { -178 $a = array_slice($a, $offset, $this->length, true); +178 $a = array_slice($a, $offset, NULL, true); 179 } 180 181 $pos = array_search($state, $a, $strict); @@ -269,7 +269,7 @@

Classes

185 return false; 186 } 187 -188 $a = array_slice($a, $pos); +188 $a = array_slice($a, $pos - $offset); 189 $length = array_search(!$state, $a, $strict); 190 $length = $length ? $length : $this->length - $pos; 191 @@ -291,198 +291,236 @@

Classes

207 } 208 209 /** -210 * Return all attributes at a given offset +210 * Return an array of substrings that have a given attribute 211 * -212 * @param int $pos offset -213 * @return string[] attributes at the given offset -214 */ -215 public function attributesAt($pos) { -216 $attributes = []; -217 -218 foreach ($this->attributes as $attribute => &$map) { -219 if ($map[$pos]) { -220 $attributes[] = $attribute; -221 } -222 } -223 -224 return $attributes; -225 } -226 -227 /** -228 * Convert to HTML, using a given class to mark attribute spans -229 * -230 * @param string $tag HTML tag to use for the spans (defaults is "<span>") -231 * @param string $classPrefix Optional prefix used to convert the attribute names to class names -232 * @return string HTML -233 * @throws Exception if the AttributedString cannot be converted to HTML due to improper nesting -234 */ -235 public function toHtml($tag = "span", $classPrefix = "") { -236 foreach($this->attributes as $attribute => $map) $state[$attribute] = false; -237 -238 $html = ""; -239 $stack = []; -240 $lastPos = 0; -241 -242 for ($i=0; $i<$this->length; $i++) -243 { -244 foreach($this->attributes as $attribute => &$map) -245 { -246 if ($this->attributes[$attribute][$i] != $state[$attribute]) -247 { -248 $state[$attribute] = $this->attributes[$attribute][$i]; -249 -250 $html .= mb_substr($this->string, $lastPos, $i-$lastPos, "utf-8"); -251 $lastPos = $i; -252 -253 if ($state[$attribute]) -254 { -255 $html .= "<$tag class=\"$classPrefix$attribute\">"; -256 $stack[] = $attribute; -257 } -258 else -259 { -260 if ($attribute != array_pop($stack)) -261 { -262 throw new Exception("Attributes are not properly nested for HTML conversion"); -263 } -264 $html .= "</$tag>"; -265 } -266 } -267 } -268 } -269 -270 $html .= mb_substr($this->string, $lastPos, $this->length-$lastPos, 'utf-8'); -271 -272 // Close all spans that remained open -273 $html .= str_repeat("</$tag>", count($stack)); -274 -275 return $html; -276 } -277 -278 /** -279 * Combine attributes with the given boolean operation -280 * -281 * @param string $op one of or|xor|and|not -282 * @param string $attribute1 name of the first attribute -283 * @param string $attribute2 Name of the second attribute. Ignored for "not" operation. -284 * @param string $to optional name of the attribute to copy the result to -285 * @throws InvalidArgumentException if one of the attributes does not exist or an unkown operation is given -286 */ -287 public function combineAttributes($op, $attribute1, $attribute2 = false, $to = false) -288 { -289 $to = isset($to) ? $to : $attribute1; -290 $op = strtolower($op); -291 -292 if ($op == "not") { -293 $attribute2 = $attribute1; -294 } -295 -296 if (!$this->hasAttribute($attribute1) or !$this->hasAttribute($attribute2)) { -297 throw new \InvalidArgumentException("Attribute does not exist"); -298 } -299 -300 if (!isset($this->attributes[$to])) { -301 $this->attributes[$to] = []; // No need to init because array is created below -302 } -303 -304 // Switch outside the loops for speed -305 switch ($op) { -306 case 'or': -307 for($i = 0; $i < $this->length; $i++) { -308 $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] || $this->attributes[$attribute2][$i]; -309 } -310 break; -311 -312 case 'xor': -313 for($i = 0; $i < $this->length; $i++) { -314 $this->attributes[$to][$i] = ($this->attributes[$attribute1][$i] xor $this->attributes[$attribute2][$i]); -315 } -316 break; -317 -318 case 'and': -319 for($i = 0; $i < $this->length; $i++) { -320 $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] && $this->attributes[$attribute2][$i]; -321 } -322 break; -323 -324 case 'not': -325 for($i = 0; $i < $this->length; $i++) { -326 $this->attributes[$to][$i] = !$this->attributes[$attribute1][$i]; -327 } -328 break; -329 -330 default: -331 throw new \InvalidArgumentException("Unknown operation"); +212 * @param string $attribute name of the attribute +213 * @param int $pos offset +214 * @param bool $state the state to look for (default is true) +215 * @param bool $strict perform strict comparison during search +216 * @return string[] array of strings with given attribute +217 */ +218 public function substrings($attribute, $offset = 0, $state = true, $strict = true) +219 { +220 $substrings = []; +221 while (false !== $pl = $this->searchAttribute($attribute, $offset, true, $state, $strict)) +222 { +223 //var_dump($pl); +224 $substring = mb_substr($this->string, $pl[0], $pl[1], "UTF-8"); +225 $substrings[] = $substring; +226 $offset = $pl[0] + $pl[1]; +227 } +228 +229 return $substrings; +230 } +231 +232 /** +233 * Return all parts of the string that have a given attribute as new string +234 * +235 * @param string $attribute name of the attribute +236 * @param int $pos offset +237 * @param bool $state the state to look for (default is true) +238 * @param bool $strict perform strict comparison during search +239 * @param string $glue glue that is inserted between the parts, default is nothing ("") +240 * @return string combined filtered string +241 */ +242 public function filter($attribute, $offset = 0, $state = true, $strict = true, $glue = "") +243 { +244 return implode($glue, $this->substrings($attribute, $offset, $state, $strict)); +245 } +246 +247 /** +248 * Return all attributes at a given offset +249 * +250 * @param int $pos offset +251 * @return string[] attributes at the given offset +252 */ +253 public function attributesAt($pos) { +254 $attributes = []; +255 +256 foreach ($this->attributes as $attribute => &$map) { +257 if ($map[$pos]) { +258 $attributes[] = $attribute; +259 } +260 } +261 +262 return $attributes; +263 } +264 +265 /** +266 * Convert to HTML, using a given class to mark attribute spans +267 * +268 * @param string $tag HTML tag to use for the spans (defaults is "<span>") +269 * @param string $classPrefix Optional prefix used to convert the attribute names to class names +270 * @return string HTML +271 * @throws Exception if the AttributedString cannot be converted to HTML due to improper nesting +272 */ +273 public function toHtml($tag = "span", $classPrefix = "") { +274 foreach($this->attributes as $attribute => $map) $state[$attribute] = false; +275 +276 $html = ""; +277 $stack = []; +278 $lastPos = 0; +279 +280 for ($i=0; $i<$this->length; $i++) +281 { +282 foreach($this->attributes as $attribute => &$map) +283 { +284 if ($this->attributes[$attribute][$i] != $state[$attribute]) +285 { +286 $state[$attribute] = $this->attributes[$attribute][$i]; +287 +288 $html .= mb_substr($this->string, $lastPos, $i-$lastPos, "utf-8"); +289 $lastPos = $i; +290 +291 if ($state[$attribute]) +292 { +293 $html .= "<$tag class=\"$classPrefix$attribute\">"; +294 $stack[] = $attribute; +295 } +296 else +297 { +298 if ($attribute != array_pop($stack)) +299 { +300 throw new Exception("Attributes are not properly nested for HTML conversion"); +301 } +302 $html .= "</$tag>"; +303 } +304 } +305 } +306 } +307 +308 $html .= mb_substr($this->string, $lastPos, $this->length-$lastPos, 'utf-8'); +309 +310 // Close all spans that remained open +311 $html .= str_repeat("</$tag>", count($stack)); +312 +313 return $html; +314 } +315 +316 /** +317 * Combine attributes with the given boolean operation +318 * +319 * @param string $op one of or|xor|and|not +320 * @param string $attribute1 name of the first attribute +321 * @param string $attribute2 Name of the second attribute. Ignored for "not" operation. +322 * @param string $to optional name of the attribute to copy the result to +323 * @throws InvalidArgumentException if one of the attributes does not exist or an unkown operation is given +324 */ +325 public function combineAttributes($op, $attribute1, $attribute2 = false, $to = false) +326 { +327 $to = isset($to) ? $to : $attribute1; +328 $op = strtolower($op); +329 +330 if ($op == "not") { +331 $attribute2 = $attribute1; 332 } -333 } -334 -335 /** -336 * Enable and fill cache for byte to char offset conversion -337 * -338 * May improve performance if setPattern is used extensively -339 */ -340 public function enablebyteToCharCache() { -341 $this->byteToChar = []; -342 $char = 0; -343 for ($i = 0; $i < strlen($this->string); ) { -344 $char++; -345 $byte = $this->string[$i]; -346 $cl = self::utf8CharLen($byte); -347 $i += $cl; -348 -349 $this->byteToChar[$i] = $char; -350 } -351 } -352 -353 protected function byteToCharOffset($boff) { -354 if (isset($this->byteToChar[$boff])) return $this->byteToChar[$boff]; -355 -356 return $this->byteToChar[$boff] = self::byteToCharOffsetString($this->string, $boff); -357 } -358 -359 protected function charToByteOffset($char) { -360 $byte = strlen(mb_substr($this->string, 0, $char, "utf-8")); -361 if (!isset($this->byteToChar[$byte])) $this->byteToChar[$byte] = $char; -362 -363 return $byte; -364 } -365 -366 protected static function byteToCharOffsetString($string, $boff) { -367 $result = 0; -368 -369 for ($i = 0; $i < $boff; ) { -370 $result++; -371 $byte = $string[$i]; -372 $cl = self::utf8CharLen($byte); -373 $i += $cl; -374 } -375 -376 return $result; -377 } -378 -379 protected static function utf8CharLen($byte) { -380 $base2 = str_pad(base_convert((string) ord($byte), 10, 2), 8, "0", STR_PAD_LEFT); -381 $p = strpos($base2, "0"); -382 -383 if ($p == 0) { -384 return 1; -385 } elseif ($p <= 4) { -386 return $p; -387 } else { -388 throw new \InvalidArgumentException(); -389 } -390 } -391 -392 /** -393 * Return string length (number of UTF-8 chars, not strlen()) -394 * -395 * @return int string length -396 */ -397 public function count() { -398 return $this->length; -399 } -400 } -401 +333 +334 if (!$this->hasAttribute($attribute1) or !$this->hasAttribute($attribute2)) { +335 throw new \InvalidArgumentException("Attribute does not exist"); +336 } +337 +338 if (!isset($this->attributes[$to])) { +339 $this->attributes[$to] = []; // No need to init because array is created below +340 } +341 +342 // Switch outside the loops for speed +343 switch ($op) { +344 case 'or': +345 for($i = 0; $i < $this->length; $i++) { +346 $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] || $this->attributes[$attribute2][$i]; +347 } +348 break; +349 +350 case 'xor': +351 for($i = 0; $i < $this->length; $i++) { +352 $this->attributes[$to][$i] = ($this->attributes[$attribute1][$i] xor $this->attributes[$attribute2][$i]); +353 } +354 break; +355 +356 case 'and': +357 for($i = 0; $i < $this->length; $i++) { +358 $this->attributes[$to][$i] = $this->attributes[$attribute1][$i] && $this->attributes[$attribute2][$i]; +359 } +360 break; +361 +362 case 'not': +363 for($i = 0; $i < $this->length; $i++) { +364 $this->attributes[$to][$i] = !$this->attributes[$attribute1][$i]; +365 } +366 break; +367 +368 default: +369 throw new \InvalidArgumentException("Unknown operation"); +370 } +371 } +372 +373 /** +374 * Enable and fill cache for byte to char offset conversion +375 * +376 * May improve performance if setPattern is used extensively +377 */ +378 public function enablebyteToCharCache() { +379 $this->byteToChar = []; +380 $char = 0; +381 for ($i = 0; $i < strlen($this->string); ) { +382 $char++; +383 $byte = $this->string[$i]; +384 $cl = self::utf8CharLen($byte); +385 $i += $cl; +386 +387 $this->byteToChar[$i] = $char; +388 } +389 } +390 +391 protected function byteToCharOffset($boff) { +392 if (isset($this->byteToChar[$boff])) return $this->byteToChar[$boff]; +393 +394 return $this->byteToChar[$boff] = self::byteToCharOffsetString($this->string, $boff); +395 } +396 +397 protected function charToByteOffset($char) { +398 $byte = strlen(mb_substr($this->string, 0, $char, "utf-8")); +399 if (!isset($this->byteToChar[$byte])) $this->byteToChar[$byte] = $char; +400 +401 return $byte; +402 } +403 +404 protected static function byteToCharOffsetString($string, $boff) { +405 $result = 0; +406 +407 for ($i = 0; $i < $boff; ) { +408 $result++; +409 $byte = $string[$i]; +410 $cl = self::utf8CharLen($byte); +411 $i += $cl; +412 } +413 +414 return $result; +415 } +416 +417 protected static function utf8CharLen($byte) { +418 $base2 = str_pad(base_convert((string) ord($byte), 10, 2), 8, "0", STR_PAD_LEFT); +419 $p = strpos($base2, "0"); +420 +421 if ($p == 0) { +422 return 1; +423 } elseif ($p <= 4) { +424 return $p; +425 } else { +426 throw new \InvalidArgumentException(); +427 } +428 } +429 +430 /** +431 * Return string length (number of UTF-8 chars, not strlen()) +432 * +433 * @return int string length +434 */ +435 public function count() { +436 return $this->length; +437 } +438 } +439