Skip to content

Commit

Permalink
CLDR-17582 Add images, fix data and Emoji.java
Browse files Browse the repository at this point in the history
  • Loading branch information
macchiati committed May 1, 2024
1 parent 58227d2 commit 923f57e
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 20 deletions.
28 changes: 14 additions & 14 deletions common/annotations/en.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3908,20 +3908,20 @@ annotations.
<annotation cp="µ">measure | micro sign</annotation>
<annotation cp="µ" type="tts">micro sign</annotation>
<!-- start hand-generated lines from E16 -->
<annotation cp='🪉'>harp</annotation>
<annotation cp='🪉' type='tts'>=music | instrument | love | cupid | orchestra</annotation>
<annotation cp='🪏'>shovel</annotation>
<annotation cp='🪏' type='tts'>=dig | spade | hole | scoop</annotation>
<annotation cp='🪾'>leafless tree</annotation>
<annotation cp='🪾' type='tts'>=winter | barren | drought</annotation>
<annotation cp='🫆'>fingerprint</annotation>
<annotation cp='🫆' type='tts'>=forensics | identity | safety</annotation>
<annotation cp='🫜'>root vegetable</annotation>
<annotation cp='🫜' type='tts'>=root | vegetable | garden | beet | turnip</annotation>
<annotation cp='🫟'>splatter</annotation>
<annotation cp='🫟' type='tts'>=paint | holi | spill | stain</annotation>
<annotation cp='🫩'>face with bags under eyes</annotation>
<annotation cp='🫩' type='tts'>=tired | sleepy | exhausted</annotation>
<annotation cp='🪉' type='tts'>harp</annotation>
<annotation cp='🪉'>music | instrument | love | cupid | orchestra</annotation>
<annotation cp='🪏' type='tts'>shovel</annotation>
<annotation cp='🪏'>dig | spade | hole | scoop</annotation>
<annotation cp='🪾' type='tts'>leafless tree</annotation>
<annotation cp='🪾'>winter | barren | drought</annotation>
<annotation cp='🫆' type='tts'>fingerprint</annotation>
<annotation cp='🫆'>forensics | identity | safety</annotation>
<annotation cp='🫜' type='tts'>root vegetable</annotation>
<annotation cp='🫜'>root | vegetable | garden | beet | turnip</annotation>
<annotation cp='🫟' type='tts'>splatter</annotation>
<annotation cp='🫟'>paint | holi | spill | stain</annotation>
<annotation cp='🫩' type='tts'>face with bags under eyes</annotation>
<annotation cp='🫩'>tired | sleepy | exhausted</annotation>
<!-- end hand-generated lines from E16 -->
</annotations>
</ldml>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 23 additions & 6 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/Emoji.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.unicode.cldr.util;

import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
Expand All @@ -18,6 +17,8 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.util.PathHeader.PageId;

Expand Down Expand Up @@ -73,16 +74,21 @@ public class Emoji {
static final UnicodeSet allRgi = new UnicodeSet();
static final UnicodeSet allRgiNoES = new UnicodeSet();

// ߘ€ E1.0 grinning face

static {
/*
* Example from emoji-test.txt:
* # group: Smileys & Emotion
* # subgroup: face-smiling
* 1F600 ; fully-qualified # 😀 grinning face
*/
Splitter semi = Splitter.on(CharMatcher.anyOf(";#")).trimResults();
Splitter semi = Splitter.on(';').trimResults();
String majorCategory = null;
String minorCategory = null;
final Matcher commentMatcher =
Pattern.compile("\\s*[\\S]+\\s+(?:E\\d*.\\d+\\s+)(.*)").matcher("");

for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) {
if (line.startsWith("#")) {
line = line.substring(1).trim();
Expand All @@ -103,20 +109,31 @@ public class Emoji {

String emojiHex = it.next();
String original = Utility.fromHex(emojiHex, 4, " ");
String type = it.next();
String typeRaw = it.next();
// fully-qualified # #️⃣ E0.6 keycap: #
int hashPos = typeRaw.indexOf('#');
if (hashPos < 0) {
throw new IllegalArgumentException("unexpected comment format: " + typeRaw);
}
String type = typeRaw.substring(0, hashPos).trim();
if (type.startsWith("fully-qualified")) {
allRgi.add(original);
allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, ""));
}
emojiToMajorCategory.put(original, majorCategory);
emojiToMinorCategory.put(original, minorCategory);
String comment = it.next();
String comment = typeRaw.substring(hashPos + 1);
if (!commentMatcher.reset(comment).matches()) {
throw new IllegalArgumentException("unexpected comment format");
}
String name = commentMatcher.group(1);
// The comment is now of the form: # 😁 E0.6 beaming face with smiling eyes
int spacePos = comment.indexOf(' ');
// int spacePos = comment.indexOf(' ');
// The format changed in v15.1, so there is no version number.
// Thus the following is commented out:
// spacePos = comment.indexOf(' ', spacePos + 1); // get second space
String name = comment.substring(spacePos + 1).trim();
// String name = comment.substring(spacePos + 1).trim();

toName.put(original, name);

// add all the non-constructed values to a set for annotations
Expand Down

0 comments on commit 923f57e

Please sign in to comment.