-
Notifications
You must be signed in to change notification settings - Fork 0
/
profanity_filter.php
103 lines (96 loc) · 5.21 KB
/
profanity_filter.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
<?php
/**
* filter class to replace profanity with safe characters
*/
class filter_profanity
{
// these characters will be looked for as joining characters between letters in attempt to bypass the filter l-i-k-e t_h_i_s...
private $joining_chars = ' _\-\+\.';
// these words should be the plain ascii version
// the code will generate regular expression replacements based on the character arrays below
// mis-spellings (like 'fck' instead of 'fuck') will need to be manually added, the code will then generate
// corresponding equivalents (like ⓕⓒⓚ)
private $profanity = array(
'anal','anus','arse','ass','assface','asshole','asslick','asswipe',
'ballsack','bastard','biatch','bitch','blowjob','bollock','bollok','boob','bugger','bum','butt','butthole','buttcam','buttplug','buttwipe','buttfucking','buttfuck','barely legal','bdsm','bbw','bimbo','bukkake',
'clit','clitoris','cock','cockhead','cocksucker','coon','crap','cunt','cum','cumshot','cumming',
'damn','dick','dickhead','dildo','dyke','deepthroat','defloration','doggystyle','dp',
'ejaculation',
'fag','fatass','fck','fellate','fellatio','felching','fuck','fucker','fuckface','fudgepacker','fucked','fisting','fingering','foreplay','foursome',
'gayboy','gaygirl','goddamn','gagged','gloryhole','golden shower','gilf',
'homo','handjob','hymen','huge toy','hooter',
'jackoff','jap','jizz',
'knob','knobend','knobjockey','knocker',
'labia','lactating','ladyboy',
'masterbate','masturbate','mofo','muff','milf','muff dive','muff diving',
'nigga','nigger','nipple',
'orgy',
'paki','penis','piss','pisstake','poop','porn','prick','pube','pussy','pornstar','porn star','porno','pornographic','pissing',
'rectum','retard',
'schlong','scrotum','sex','shit','shithead','shyte','slut','spunk','shitting','sperm','strap on','stripper','speculum','sybian',
'tit','tosser','turd','twat','threesome','topless','titty',
'vagina',
'whore','wank','wanker','whoar',
);
// these characters will replace each letter in a profanity word above in a regex character class
private $replacement = array(
'a' => 'aªàáâãäåāăąǎȁȃȧᵃḁẚạảₐ⒜ⓐa4⍺4⁴₄④⑷⒋4₳@',
'b' => 'bᵇḃḅḇ⒝ⓑbɞßℬ฿',
'c' => 'cçćĉċčᶜⅽ⒞ⓒc©¢℃¢€\<',
'd' => 'dďᵈḋḍḏḑḓⅆⅾ⒟ⓓd',
'e' => 'eèéêëēĕėęěȅȇȩᵉḙḛẹẻẽₑ℮ℯⅇ⒠ⓔe⅀∑⨊⨋€℮',
'f' => 'fᶠḟ⒡ⓕfffƒ⨐ƒ៛',
'g' => 'gĝğġģǧǵɡᵍᵹḡℊ⒢ⓖg',
'h' => 'hĥȟʰһḣḥḧḩḫẖₕℎ⒣ⓗh44⁴₄④⑷⒋4',
'i' => 'iìíîïĩīĭįİıǐȉȋᵢḭỉịⁱℹⅈⅰⅱ⒤ⓘilĺļľŀˡḷḻḽₗℓⅼ⒧ⓛl|׀∣❘|1¹₁⅟①⑴⒈1',
'j' => 'jĵǰʲⅉ⒥ⓙⱼj',
'k' => 'kķǩᵏḱḳḵₖ⒦ⓚk',
'l' => 'iìíîïĩīĭįİıǐȉȋᵢḭỉịⁱℹⅈⅰⅱ⒤ⓘilĺļľŀˡḷḻḽₗℓⅼ⒧ⓛl|׀∣❘|1¹₁⅟①⑴⒈1',
'm' => 'mᵐḿṁṃₘⅿ⒨ⓜ㎜mℳ',
'n' => 'nñńņňʼnƞǹṅṇṉṋⁿₙ⒩ⓝn',
'o' => 'oºòóôõöōŏőơǒǫȍȏȯᵒọỏₒℴ⒪ⓞo°⃝⃠⊕⊖⊗⊘⊙⊚⊛⊜⊝⌼⌽⌾⍉⍜⍟⍥⎉⎊⎋⏀⏁⏂⏣○◌●◯⚆⚇⚪⚬❍⦲⦵⦶⦷⦸⦹⦾⧂⧃⧲⧬⨀㊀0⁰₀⓪0',
'p' => 'pᵖṕṗₚ⒫ⓟp',
'q' => 'q⒬ⓠq',
'r' => 'rŕŗřȑȓɼʳᵣṙṛṟ⒭ⓡrſẛɼẛ',
's' => 'sśŝşšșˢṡṣₛ⒮ⓢs$﹩$5⁵₅⑤⑸⒌5§',
't' => 'tţťƫțᵗƾṫṭṯṱẗₜ⒯ⓣt☨☩♰♱⛨✙✚✛✜✝✞✟⧧†\+',
'u' => 'uùúûüũūŭůűųưǔȕȗᵘᵤṳṵṷụủ⒰ⓤuvᵛᵥṽṿⅴ⒱ⓥv',
'v' => 'uùúûüũūŭůűųưǔȕȗᵘᵤṳṵṷụủ⒰ⓤuvᵛᵥṽṿⅴ⒱ⓥv',
'w' => 'wŵʷẁẃẅẇẉẘ⒲ⓦw',
'x' => 'xˣẋẍₓⅹ⒳ⓧx˟╳❌❎⤫⤬⤭⤮⤯⤰⤱⤲⨯×✕✖⨰⨱⨴⨵⨶⨷',
'y' => 'yýÿŷȳʸẏẙỳỵỷỹ⒴ⓨy¥¥',
'z' => 'zźżžƶᶻẑẓẕ⒵ⓩz2²₂②⑵⒉2',
' ' => ' _\-\+\.',
);
/**
* return a filtered string
* @param string $filter_line the string to be filtered
* @param string $replace_char optional character to use as the replacement - defaults to *
* @return string
*/
public function filter_string($filter_line, $replace_char='*')
{
/*
* loop through the words in the $profanity array, and for each character swap in the replacement characters
* within the regex character match brackets
* the regex also matches against word boundaries, so clbuttic mistakes don't occur
*/
foreach($this->profanity as $word)
{
$regex = '/(\b|[ \t])';
$regex_parts = array();
// it's ok to use strlen & substr here as the input string should only ever be ascii, never multibyte
for($i=0; $i<strlen($word); $i++)
{
$letter = substr($word, $i, 1);
$regex_parts[] = "[{$this->replacement[$letter]}]+";
}
$regex_parts[] = "[{$this->replacement['e']}]*[{$this->replacement['s']}{$this->replacement['d']}]*";
$regex .= join("[{$this->joining_chars}]*", $regex_parts);
$regex .= '(\b|[ \t])/ui';
$replacement = (mb_strlen($replace_char))?' '.str_pad('', strlen($word), $replace_char).' ':'';
$filter_line = preg_replace($regex, $replacement, $filter_line );
}
return $filter_line;
}
}