Skip to content

Commit

Permalink
feat(compat): implement deburr (#876)
Browse files Browse the repository at this point in the history
  • Loading branch information
D-Sketon authored Dec 8, 2024
1 parent ceb7513 commit 26e68dd
Show file tree
Hide file tree
Showing 8 changed files with 587 additions and 511 deletions.
10 changes: 10 additions & 0 deletions benchmarks/performance/deburr.bench.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { bench, describe } from 'vitest';
import { deburr as deburrToolkit_ } from 'es-toolkit';
import { deburr as deburrCompatToolkit_ } from 'es-toolkit/compat';
import { deburr as deburrLodash_ } from 'lodash';

const deburrToolkit = deburrToolkit_;
const deburrCompatToolkit = deburrCompatToolkit_;
const deburrLodash = deburrLodash_;

const longWord = 'déjà vu'.repeat(1000);
Expand All @@ -11,6 +13,10 @@ describe('deburr', () => {
deburrLodash('déjà vu');
});

bench('es-toolkit/compat/deburr', () => {
deburrCompatToolkit('déjà vu');
});

bench('es-toolkit/deburr', () => {
deburrToolkit('déjà vu');
});
Expand All @@ -19,6 +25,10 @@ describe('deburr', () => {
deburrLodash(longWord);
});

bench('es-toolkit/compat/deburr - long words', () => {
deburrCompatToolkit(longWord);
});

bench('es-toolkit/deburr - long words', () => {
deburrToolkit(longWord);
});
Expand Down
194 changes: 194 additions & 0 deletions src/_internal/burredLetters.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
export const burredLetters = [
// Latin-1 Supplement letters.
'\xc0',
'\xc1',
'\xc2',
'\xc3',
'\xc4',
'\xc5',
'\xc6',
'\xc7',
'\xc8',
'\xc9',
'\xca',
'\xcb',
'\xcc',
'\xcd',
'\xce',
'\xcf',
'\xd0',
'\xd1',
'\xd2',
'\xd3',
'\xd4',
'\xd5',
'\xd6',
'\xd8',
'\xd9',
'\xda',
'\xdb',
'\xdc',
'\xdd',
'\xde',
'\xdf',
'\xe0',
'\xe1',
'\xe2',
'\xe3',
'\xe4',
'\xe5',
'\xe6',
'\xe7',
'\xe8',
'\xe9',
'\xea',
'\xeb',
'\xec',
'\xed',
'\xee',
'\xef',
'\xf0',
'\xf1',
'\xf2',
'\xf3',
'\xf4',
'\xf5',
'\xf6',
'\xf8',
'\xf9',
'\xfa',
'\xfb',
'\xfc',
'\xfd',
'\xfe',
'\xff',
// Latin Extended-A letters.
'\u0100',
'\u0101',
'\u0102',
'\u0103',
'\u0104',
'\u0105',
'\u0106',
'\u0107',
'\u0108',
'\u0109',
'\u010a',
'\u010b',
'\u010c',
'\u010d',
'\u010e',
'\u010f',
'\u0110',
'\u0111',
'\u0112',
'\u0113',
'\u0114',
'\u0115',
'\u0116',
'\u0117',
'\u0118',
'\u0119',
'\u011a',
'\u011b',
'\u011c',
'\u011d',
'\u011e',
'\u011f',
'\u0120',
'\u0121',
'\u0122',
'\u0123',
'\u0124',
'\u0125',
'\u0126',
'\u0127',
'\u0128',
'\u0129',
'\u012a',
'\u012b',
'\u012c',
'\u012d',
'\u012e',
'\u012f',
'\u0130',
'\u0131',
'\u0132',
'\u0133',
'\u0134',
'\u0135',
'\u0136',
'\u0137',
'\u0138',
'\u0139',
'\u013a',
'\u013b',
'\u013c',
'\u013d',
'\u013e',
'\u013f',
'\u0140',
'\u0141',
'\u0142',
'\u0143',
'\u0144',
'\u0145',
'\u0146',
'\u0147',
'\u0148',
'\u0149',
'\u014a',
'\u014b',
'\u014c',
'\u014d',
'\u014e',
'\u014f',
'\u0150',
'\u0151',
'\u0152',
'\u0153',
'\u0154',
'\u0155',
'\u0156',
'\u0157',
'\u0158',
'\u0159',
'\u015a',
'\u015b',
'\u015c',
'\u015d',
'\u015e',
'\u015f',
'\u0160',
'\u0161',
'\u0162',
'\u0163',
'\u0164',
'\u0165',
'\u0166',
'\u0167',
'\u0168',
'\u0169',
'\u016a',
'\u016b',
'\u016c',
'\u016d',
'\u016e',
'\u016f',
'\u0170',
'\u0171',
'\u0172',
'\u0173',
'\u0174',
'\u0175',
'\u0176',
'\u0177',
'\u0178',
'\u0179',
'\u017a',
'\u017b',
'\u017c',
'\u017d',
'\u017e',
'\u017f',
];
119 changes: 119 additions & 0 deletions src/_internal/comboMarks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/** List of combining diacritical marks. */
export const comboMarks = [
'\u0300',
'\u0301',
'\u0302',
'\u0303',
'\u0304',
'\u0305',
'\u0306',
'\u0307',
'\u0308',
'\u0309',
'\u030a',
'\u030b',
'\u030c',
'\u030d',
'\u030e',
'\u030f',
'\u0310',
'\u0311',
'\u0312',
'\u0313',
'\u0314',
'\u0315',
'\u0316',
'\u0317',
'\u0318',
'\u0319',
'\u031a',
'\u031b',
'\u031c',
'\u031d',
'\u031e',
'\u031f',
'\u0320',
'\u0321',
'\u0322',
'\u0323',
'\u0324',
'\u0325',
'\u0326',
'\u0327',
'\u0328',
'\u0329',
'\u032a',
'\u032b',
'\u032c',
'\u032d',
'\u032e',
'\u032f',
'\u0330',
'\u0331',
'\u0332',
'\u0333',
'\u0334',
'\u0335',
'\u0336',
'\u0337',
'\u0338',
'\u0339',
'\u033a',
'\u033b',
'\u033c',
'\u033d',
'\u033e',
'\u033f',
'\u0340',
'\u0341',
'\u0342',
'\u0343',
'\u0344',
'\u0345',
'\u0346',
'\u0347',
'\u0348',
'\u0349',
'\u034a',
'\u034b',
'\u034c',
'\u034d',
'\u034e',
'\u034f',
'\u0350',
'\u0351',
'\u0352',
'\u0353',
'\u0354',
'\u0355',
'\u0356',
'\u0357',
'\u0358',
'\u0359',
'\u035a',
'\u035b',
'\u035c',
'\u035d',
'\u035e',
'\u035f',
'\u0360',
'\u0361',
'\u0362',
'\u0363',
'\u0364',
'\u0365',
'\u0366',
'\u0367',
'\u0368',
'\u0369',
'\u036a',
'\u036b',
'\u036c',
'\u036d',
'\u036e',
'\u036f',
'\ufe20',
'\ufe21',
'\ufe22',
'\ufe23',
];
Loading

0 comments on commit 26e68dd

Please sign in to comment.