Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17327 speedup VoteResolver's transcript #3462

Merged
merged 2 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.unicode.cldr.util;

import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;

/**
* A deferred list of formatted strings, one per line. The formatting is deferred until when (and
* if) get() is called. It's thread safe to call get() multiple times, but add() should be called
* from a single thread.
*
* <p>Once get() is called, the contents are 'resolved' into a string, and subsequent get() calls
* will return the same string (even if add() was called). To re-use the object, call clear() and
* then new content can be added with add().
*
* <p>In testing, there was a significant speed-up for the use case where get() might not be called.
*/
public class DeferredTranscript implements Supplier<String> {

private final class FormatEntry {
public final CharSequence fmt;
public final Object[] args;

/** Add a new FormatEntry as a deferred format */
public FormatEntry(String fmt, Object[] args) {
macchiati marked this conversation as resolved.
Show resolved Hide resolved
this.fmt = fmt;
this.args = args;
}

/** Output this entry as a CharSequence, performing any formatting needed. */
public final CharSequence format() {
if (args == null || args.length == 0) {
return fmt;
} else {
return String.format(fmt.toString(), args);
}
}
}

/** clear the DeferredTranscript on startup */
public DeferredTranscript() {
clear();
}

/** The first time this is read, it will calculate the entire string. */
private Supplier<String> delegate = null;

/** cached formats */
private List<FormatEntry> formats;

/** reset this transcript so it can be added to again. */
public void clear() {
// the delegate only calculates the value once, the first time it is accessed.
delegate =
Suppliers.memoize(
new Supplier<String>() {

@Override
public String get() {
return formats.stream()
.map(FormatEntry::format)
.collect(Collectors.joining("\n"));
}
});
formats = new LinkedList<>();
}

@Override
public final String get() {
return delegate.get();
}

/**
* Add a formatted entry. Will have no effect if get() has been called since clear() or object
* construction.
*
* @param fmt the string or pattern string
* @param args if non-null, arguments to String.format()
*/
final DeferredTranscript add(String fmt, Object... args) {
formats.add(new FormatEntry(fmt, args));
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,23 @@ public VoteResolver(VoterInfoList vil) {
private static final boolean DEBUG = false;

/** This enables a prose discussion of the voting process. */
private StringBuilder transcript = null;
private DeferredTranscript transcript = null;

public void enableTranscript() {
if (transcript == null) {
transcript = new StringBuilder();
transcript = new DeferredTranscript();
}
}

public void disableTranscript() {
transcript = null;
}

public String getTranscript() {
if (transcript == null) {
return null;
} else {
return transcript.toString();
return transcript.get();
}
}

Expand All @@ -86,14 +90,10 @@ public String getTranscript() {
* @param fmt
* @param args
*/
private void annotateTranscript(String fmt, Object... args) {
if (DEBUG) {
System.out.println("Transcript: " + String.format(fmt, args));
}
if (transcript == null) {
return;
private final void annotateTranscript(String fmt, Object... args) {
if (transcript != null) {
transcript.add(fmt, args);
}
transcript.append(String.format(fmt, args)).append("\n");
}

/**
Expand Down Expand Up @@ -617,7 +617,8 @@ public void clear() {
baileyValue = null;
baileySet = false;
if (transcript != null) {
transcript = new StringBuilder();
// there was a transcript before, so retain it
transcript = new DeferredTranscript();
}
}

Expand Down Expand Up @@ -996,7 +997,7 @@ public void clear() {
nValue = null;

if (transcript != null) {
transcript.setLength(0);
transcript.clear();
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package org.unicode.cldr.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

public class TestDeferredTranscript {
@Test
public void TestBasic() {
DeferredTranscript dt = new DeferredTranscript();

dt.clear();

assertEquals("", dt.get());
assertEquals("", dt.get());
dt.clear();
dt.add("Hello");
assertEquals("Hello", dt.get());
// all of these are ignored, we've already committed the output.
dt.add("Hello");
dt.add("Hello");
dt.add("Hello");
dt.add("Goodbye %d times", 999);
assertEquals("Hello", dt.get());
dt.clear();
dt.add("Hello");
dt.add("Hello");
assertEquals("Hello\nHello", dt.get());
assertEquals("Hello\nHello", dt.get());
dt.clear();
assertEquals("", dt.get());
dt.clear();
dt.add("You have %d problems.", 0);
dt.add("Thanks");
assertEquals("You have 0 problems.\nThanks", dt.get());
dt.add("Thanks");
assertEquals("You have 0 problems.\nThanks", dt.get());
}

@ParameterizedTest
@ValueSource(booleans = {false, true})
public void TestPerf(boolean doGet) {
DeferredTranscript dt = new DeferredTranscript();
for (int i = 0; i < 100000; i++) {
dt.add("Now you have %d problem(s).", i);
}
// 1M iterations: 2.3 seconds without get(), 10 seconds with
if (doGet) {
// if doGet is false, the above code is faster.
// defer the formatting
final String s0 = dt.get();
assertNotNull(s0);
final String s1 = dt.get();
assertEquals(s0, s1);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import com.ibm.icu.util.Output;
import java.util.Date;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.unicode.cldr.unittest.TestUtilities;
import org.unicode.cldr.util.VoteResolver.Status;

Expand Down Expand Up @@ -46,6 +48,37 @@ void testDisputed() {
vr.getStatusForOrganization(Organization.google)));
}

@ParameterizedTest
@ValueSource(booleans = {false, true})
public void testPerf(boolean doGet) {
final VoteResolver<String> vr = getStringResolver();
vr.enableTranscript();

for (int i = 0; i < 100; i++) {
vr.clear();
vr.setLocale(
CLDRLocale.getInstance("fr"), null); // NB: pathHeader is needed for annotations
vr.setBaseline("bafut", Status.unconfirmed);
vr.setBaileyValue("bfd");
vr.add("bambara", TestUtilities.TestUser.appleV.voterId);
vr.add("bafia", TestUtilities.TestUser.googleV.voterId);
vr.add("bassa", TestUtilities.TestUser.googleV2.voterId);
vr.add("bafut", TestUtilities.TestUser.unaffiliatedS.voterId);

assertAll(
"Verify the outcome",
() -> assertEquals("bambara", vr.getWinningValue()),
() -> assertEquals(Status.provisional, vr.getWinningStatus()));

// about 10x faster without calling get()
if (doGet) {
assertTrue(
vr.getTranscript().contains("earlier than 'bassa'"),
() -> "Transcript did not match expectations:\n" + vr.getTranscript());
}
}
}

@Test
void testExplanations() {
// Example from https://st.unicode.org/cldr-apps/v#/fr/Languages_A_D/54dc38b9b6c86cac
Expand Down
Loading