From 564c04dbc020d1a684b35fc2701b015dc1327b53 Mon Sep 17 00:00:00 2001 From: nachtjasmin Date: Fri, 28 Jul 2023 21:49:23 +0200 Subject: [PATCH] Cover one more case with comma-separated pronouns --- src/libs/pronouns.js | 106 ++++------------------------------ tests/extractPronouns.spec.js | 5 ++ 2 files changed, 15 insertions(+), 96 deletions(-) diff --git a/src/libs/pronouns.js b/src/libs/pronouns.js index c7ef35c..a22da42 100644 --- a/src/libs/pronouns.js +++ b/src/libs/pronouns.js @@ -1,4 +1,5 @@ import sanitizeHtml from "sanitize-html"; +import { allKnownPronouns } from "./generated/pronouns/index.js"; const fieldMatchers = [/\bpro.*nouns?\b/i, /\bpronomen\b/i, /(i )?go(es)? by/i]; const knownPronounUrls = [ @@ -217,95 +218,6 @@ function sanitizePronouns(str) { return str === "" ? null : str; } -const knownPronouns = [ - "ae", - "aer", - "aers", - "aerself", - "co", - "co's", - "cos", - "coself", - "e", - "eir", - "eirs", - "em", - "ems", - "emself", - "es", - "ey", - "fae", - "faer", - "faers", - "faerself", - "he", - "her", - "hers", - "herself", - "him", - "himself", - "hir", - "hirs", - "hirself", - "his", - "hu", - "hum", - "hus", - "huself", - "it", - "its", - "itself", - "ne", - "nem", - "nemself", - "nir", - "nirs", - "nirself", - "one", - "one's", - "oneself", - "per", - "pers", - "perself", - "s/he", - "she", - "their", - "theirs", - "them", - "themself", - "themselves", - "they", - "thon", - "thon's", - "thons", - "thonself", - "ve", - "ver", - "vers", - "verself", - "vi", - "vim", - "vims", - "vimself", - "vir", - "virs", - "virself", - "vis", - "xe", - "xem", - "xemself", - "xyr", - "xyrs", - "ze", - "zhe", - "zher", - "zhers", - "zherself", - "zir", - "zirs", - "zirself", -]; - /** * Tries to extract pronouns from the given text. Only "known" pronouns are returned, which is * a compromise for the pattern matching. At no point we want to limit the pronouns used by persons. @@ -321,7 +233,7 @@ function searchForKnownPronouns(text) { // // Why not just two of them? Well, for combinations of multiple subjective pronouns, like "sie/she/elle", // we wanna display the whole set of pronouns if possible. - const exactMatches = text.matchAll(/(\w+) ?[/,] ?((\w+)[ /,]{0,2}){1,}/gi); + const exactMatches = text.matchAll(/(\w+)( ?[/,] ?(\w+)){1,}/gi); for (const [match] of exactMatches) { // Once we have our match, split it by the known separators and check sequentially // whether we know one of the pronouns. If that's the case, return everything in the match @@ -332,19 +244,21 @@ function searchForKnownPronouns(text) { // because pronoun URLs like pronoun.page/they/them would return something like "page/they/them", // which obviously is wrong. const parts = match.split(/[/,]/).map((x) => x.trim()); + const known = []; for (const p of parts) { - if (knownPronouns.includes(p.toLowerCase())) { - let res = match.substring(match.indexOf(p)); - res = res.replaceAll(" ", ""); - res = res.trim(); - return res; + if (allKnownPronouns.includes(p.toLowerCase())) { + known.push(p); } } + + if (known.length) { + return known.join("/"); + } } const followedByColon = text.matchAll(/pronouns?:\W+([\w/+]+)/gi); for (const match of followedByColon) { - return match.pop(); // first group is last entry in array + return match.pop() ?? null; // first group is last entry in array } const anyAllPronouns = text.match(/(any|all) +pronouns/gi); if (anyAllPronouns) { diff --git a/tests/extractPronouns.spec.js b/tests/extractPronouns.spec.js index b346170..4adadc2 100644 --- a/tests/extractPronouns.spec.js +++ b/tests/extractPronouns.spec.js @@ -160,6 +160,11 @@ const endToEndTests = [ note: ":speech_bubble: e/em/eir", expect: "e/em/eir", }, + { + name: "comma-separated pronouns in bio", + note: "test er, he, him, more test", + expect: "er/he/him", + }, ]; const endToEndTestSuite = suite("end to end tests"); for (const { name, fields, expect, note } of endToEndTests) {