-
Notifications
You must be signed in to change notification settings - Fork 48
/
AnonymizeText.php
90 lines (74 loc) · 2.92 KB
/
AnonymizeText.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
<?php
declare(strict_types=1);
namespace Smile\GdprDump\Converter\Anonymizer;
use Smile\GdprDump\Converter\ConverterInterface;
use Smile\GdprDump\Converter\Parameters\Parameter;
use Smile\GdprDump\Converter\Parameters\ParameterProcessor;
class AnonymizeText implements ConverterInterface
{
/**
* @var string[]
*/
private array $delimiters;
private string $replacement;
private int $minWordLength;
private bool $multiByteEnabled;
public function __construct()
{
// Call the extension_loaded function only once (few seconds gain when converting millions of values)
$this->multiByteEnabled = extension_loaded('mbstring');
}
/**
* @inheritdoc
*/
public function setParameters(array $parameters): void
{
$input = (new ParameterProcessor())
->addParameter('delimiters', Parameter::TYPE_ARRAY, false, [' ', '_', '-', '.'])
->addParameter('replacement', Parameter::TYPE_STRING, true, '*')
->addParameter('min_word_length', Parameter::TYPE_INT, true, 3)
->process($parameters);
$this->delimiters = $input->get('delimiters');
$this->replacement = $input->get('replacement');
$this->minWordLength = $input->get('min_word_length');
// Flip separators array for increased performance
$this->delimiters = array_flip($this->delimiters);
}
/**
* @inheritdoc
*/
public function convert(mixed $value, array $context = []): string
{
$value = (string) $value;
if ($value === '') {
return $value;
}
$result = '';
$currentWordLength = 0;
$array = $this->multiByteEnabled ? mb_str_split($value, 1, 'UTF-8') : str_split($value);
$lastKey = null;
foreach ($array as $index => $char) {
// Preserve separator characters (using isset instead of array_key_exists because it's faster)
if (isset($this->delimiters[$char])) {
$result .= $char;
$currentWordLength = 0;
continue;
}
// Add the replacement character (unless it is the first character of the word) and increase counters
$result .= $currentWordLength === 0 ? $char : $this->replacement;
$currentWordLength++;
// Make sure the generated word has the minimum expected size
if ($currentWordLength < $this->minWordLength) {
if ($lastKey === null) {
// Calculate the last key only once and when needed
$lastKey = array_key_last($array);
}
if ($index === $lastKey || isset($this->delimiters[$array[$index + 1]])) {
$multiplier = $this->minWordLength - $currentWordLength;
$result .= str_repeat($this->replacement, $multiplier);
}
}
}
return $result;
}
}