Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deploy to production #331

Merged
merged 21 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d0dad52
Bump the quarkus group with 3 updates
dependabot[bot] Aug 26, 2024
7e1825c
Migrate from ValueConvert to ValueModel
yrodiere Aug 27, 2024
09c63a0
Merge pull request #323 from quarkusio/dependabot/maven/quarkus-a396f…
yrodiere Aug 27, 2024
1772cb3
Bump org.apache.maven.plugins:maven-surefire-plugin
dependabot[bot] Sep 2, 2024
9995bc0
Merge pull request #325 from quarkusio/dependabot/maven/maven-plugins…
marko-bekhta Sep 2, 2024
2c11b70
Bump the quarkus group with 3 updates
dependabot[bot] Sep 2, 2024
e38f8a9
Merge pull request #326 from quarkusio/dependabot/maven/quarkus-edee3…
marko-bekhta Sep 2, 2024
6309a7c
Use single-valued highlighting where possible
yrodiere Sep 6, 2024
04c2b2d
Merge pull request #327 from yrodiere/highlight-single
yrodiere Sep 6, 2024
f15a747
Bump the quarkus group with 5 updates
dependabot[bot] Sep 9, 2024
1886853
Merge pull request #330 from quarkusio/dependabot/maven/quarkus-b74a2…
marko-bekhta Sep 9, 2024
f4b49cb
Bump net.revelc.code:impsort-maven-plugin in the maven-plugins group
dependabot[bot] Sep 9, 2024
4a549db
Merge pull request #329 from quarkusio/dependabot/maven/maven-plugins…
marko-bekhta Sep 9, 2024
c343b35
Clarify highlighter configuration
yrodiere Sep 6, 2024
09907ad
Do not remove stopwords from search
yrodiere Sep 6, 2024
5af82c3
Simplify/relax some tests
yrodiere Sep 10, 2024
42ebd00
Use fast-vector highlighting for lower search latency
yrodiere Sep 6, 2024
df69bd3
Align OpenSearch Dev Services Java Opts on those used in prod
yrodiere Sep 6, 2024
610a53b
Merge segments after indexing
yrodiere Sep 6, 2024
06f3b5f
Reduce the queue count and bulk size in dev/prod
yrodiere Sep 11, 2024
08c4ad5
Merge pull request #328 from yrodiere/faster-search
yrodiere Sep 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<quarkus.platform.artifact-id>quarkus-bom</quarkus.platform.artifact-id>
<quarkus.platform.group-id>io.quarkus</quarkus.platform.group-id>
<quarkus.version>3.13.2</quarkus.version>
<quarkus.version>3.14.2</quarkus.version>
<revision>999-SNAPSHOT</revision>
<skipITs>true</skipITs>
<surefire-plugin.version>3.4.0</surefire-plugin.version>
<surefire-plugin.version>3.5.0</surefire-plugin.version>
<test.jvm.args>-Xms2g -Xmx2g</test.jvm.args>
<version.docker.plugin>0.45.0</version.docker.plugin>
<version.formatter.plugin>2.24.1</version.formatter.plugin>
<version.impsort-maven-plugin>1.11.0</version.impsort-maven-plugin>
<version.impsort-maven-plugin>1.12.0</version.impsort-maven-plugin>
<!-- This version needs to match the version in src/main/docker/opensearch-custom.Dockerfile -->
<version.opensearch>2.16</version.opensearch>
<version.quarkus-web-bundler>1.7.0</version.quarkus-web-bundler>
<version.quarkus-web-bundler>1.7.1</version.quarkus-web-bundler>
</properties>
<dependencyManagement>
<dependencies>
Expand All @@ -58,7 +58,7 @@
<dependency>
<groupId>io.quarkiverse.jgit</groupId>
<artifactId>quarkus-jgit</artifactId>
<version>3.1.2</version>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.githubapi</groupId>
Expand Down
2 changes: 1 addition & 1 deletion src/main/helm/values.staging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ app:
envs:
QUARKUS_PROFILE: 'staging'
# Avoid overloading the rather resource-constrained OpenSearch instance
INDEXING_BULK_SIZE: '10'
INDEXING_QUEUE_COUNT: '6'
INDEXING_BULK_SIZE: '10'
resources:
limits:
cpu: 2000m
Expand Down
45 changes: 25 additions & 20 deletions src/main/java/io/quarkus/search/app/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import io.quarkus.runtime.LaunchMode;

import org.hibernate.search.engine.search.common.BooleanOperator;
import org.hibernate.search.engine.search.common.ValueConvert;
import org.hibernate.search.engine.search.common.ValueModel;
import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag;
import org.hibernate.search.mapper.pojo.standalone.mapping.SearchMapping;

Expand All @@ -36,7 +36,7 @@
@Path("/")
public class SearchService {

private static final int NO_MATCH_SIZE = 32_600;
private static final int TITLE_OR_SUMMARY_MAX_SIZE = 32_600;
private static final int PAGE_SIZE = 50;
private static final long TOTAL_HIT_COUNT_THRESHOLD = 100;
private static final String MAX_FOR_PERF_MESSAGE = "{jakarta.validation.constraints.Max.message} for performance reasons";
Expand Down Expand Up @@ -73,8 +73,8 @@ public SearchResult<GuideSearchHit> search(@RestQuery @DefaultValue(QuarkusVersi
f.id(),
f.field("type"),
f.field("origin"),
f.highlight(language.addSuffix("title")),
f.highlight(language.addSuffix("summary")),
f.highlight(language.addSuffix("title")).highlighter("highlighter_title_or_summary").single(),
f.highlight(language.addSuffix("summary")).highlighter("highlighter_title_or_summary").single(),
f.highlight(language.addSuffix("fullContent")).highlighter("highlighter_content"))
.asList(GuideSearchHit::new))
.where((f, root) -> {
Expand Down Expand Up @@ -104,25 +104,30 @@ public SearchResult<GuideSearchHit> search(@RestQuery @DefaultValue(QuarkusVersi
.defaultOperator(BooleanOperator.AND))
.should(f.match().field("origin").matching("quarkus").boost(50.0f))
.should(f.not(f.match().field(language.addSuffix("topics"))
.matching("compatibility", ValueConvert.NO))
.matching("compatibility", ValueModel.INDEX))
.boost(50.0f)));
}
})
// * Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen.
// * We give control to the caller on the content snippet length and the number of these fragments
// * No match size is there to make sure that we are still going to get the text even if the field didn't have a match in it.
// * The title in the Guide entity is `Length.LONG` long, so we use that as a max value for no-match size, but hopefully nobody writes a title that long...
.highlighter(
f -> f.unified().noMatchSize(NO_MATCH_SIZE).fragmentSize(0)
.orderByScore(true)
.numberOfFragments(1)
.tag("<span class=\"" + highlightCssClass + "\">", "</span>")
.boundaryScanner().sentence().end())
// * If there's no match in the full content we don't want to return anything.
// * Also content is really huge, so we want to only get small parts of the sentences. We are allowing caller to pick the number of sentences and their length:
.highlighter("highlighter_content",
f -> f.unified().noMatchSize(0).numberOfFragments(contentSnippets)
.fragmentSize(contentSnippetsLength))
.highlighter(f -> f.fastVector()
// Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen.
.tag("<span class=\"" + highlightCssClass + "\">", "</span>"))
.highlighter("highlighter_title_or_summary", f -> f.fastVector()
// We want the whole text of the field, regardless of whether it has a match or not.
.noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE)
.fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE)
// We want the whole text as a single fragment
.numberOfFragments(1))
.highlighter("highlighter_content", f -> f.fastVector()
// If there's no match in the full content we don't want to return anything.
.noMatchSize(0)
// Content is really huge, so we want to only get small parts of the sentences.
// We give control to the caller on the content snippet length and the number of these fragments
.numberOfFragments(contentSnippets)
.fragmentSize(contentSnippetsLength)
// The rest of fragment configuration is static
.orderByScore(true)
// We don't use sentence boundaries because those can result in huge fragments
.boundaryScanner().chars().boundaryMaxScan(10).end())
.sort(f -> f.score().then().field(language.addSuffix("title_sort")))
.routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language))
.totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE)
Expand Down
15 changes: 6 additions & 9 deletions src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,18 @@ public record GuideSearchHit(URI url, String type, String origin, String title,
public GuideSearchHit(URI url,
String type,
String origin,
List<String> title,
List<String> summary,
List<String> fullContent) {
this(url, type, origin, firstOrEmpty(title), firstOrEmpty(summary), wrap(fullContent));
String title,
String summary,
List<String> content) {
this(url, type, origin, title != null ? title : "", summary != null ? summary : "", wrap(content));
}

@SuppressWarnings("unchecked")
public GuideSearchHit(List<?> values) {
this(
(URI) values.get(0), (String) values.get(1), (String) values.get(2),
(List<String>) values.get(3), (List<String>) values.get(4), (List<String>) values.get(5));
}

private static String firstOrEmpty(List<String> strings) {
return strings.isEmpty() ? "" : strings.get(0);
(String) values.get(3), (String) values.get(4),
(List<String>) values.get(5));
}

private static Set<String> wrap(List<String> strings) {
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/io/quarkus/search/app/entity/Guide.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,20 @@ public class Guide {
@KeywordField
public String origin;

@I18nFullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nFullTextField(name = "title_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nKeywordField(name = "title_sort", normalizerPrefix = AnalysisConfigurer.SORT, searchable = Searchable.NO, sortable = Sortable.YES)
public I18nData<String> title = new I18nData<>();

@I18nFullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nFullTextField(name = "summary_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
public I18nData<String> summary = new I18nData<>();

@I18nFullTextField(analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nFullTextField(name = "keywords_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
public I18nData<String> keywords = new I18nData<>();

@I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@I18nFullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH)
@IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO)
public I18nData<InputProvider> htmlFullContentProvider = new I18nData<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ public static String autocompleteAnalyzer(Language language) {
return language.addSuffix(AUTOCOMPLETE);
}

private static String stopFilter(Language language) {
return "stop_%s".formatted(language.code);
}

private static String regularStemmerFilter(Language language) {
return "stemmer_%s".formatted(language.code);
}
Expand Down Expand Up @@ -99,8 +95,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont
"lowercase",
// To remove possessives (trailing 's) from words.
possessiveStemmerFilter(language),
// To remove frequently used words that do not bring much meaning, e.g. a, that, and, are, as, at, with...
stopFilter(language),
// To remove suffixes like -s/-es/-ed etc
regularStemmerFilter(language),
// To convert characters into ascii ones, e.g. à to a or ę to e etc.
Expand All @@ -113,7 +107,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont
.tokenFilters(
"lowercase",
possessiveStemmerFilter(language),
stopFilter(language),
regularStemmerFilter(language),
"asciifolding",
// > In general, synonym filters rewrite their inputs to the tokenizer and filters used in the preceding analysis chain
Expand All @@ -133,7 +126,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont
compoundTechnicalNameFilter(language),
"lowercase",
possessiveStemmerFilter(language),
stopFilter(language),
regularStemmerFilter(language),
"asciifolding",
autocompleteEdgeNgramFilter(language))
Expand All @@ -156,7 +148,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) {
"kuromoji_part_of_speech",
possessiveStemmerFilter(language),
"ja_stop",
stopFilter(language),
"kuromoji_stemmer",
regularStemmerFilter(language),
"asciifolding")
Expand All @@ -175,7 +166,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) {
"kuromoji_part_of_speech",
possessiveStemmerFilter(language),
"ja_stop",
stopFilter(language),
"kuromoji_stemmer",
regularStemmerFilter(language),
"asciifolding",
Expand All @@ -193,7 +183,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) {
"kuromoji_part_of_speech",
possessiveStemmerFilter(language),
"ja_stop",
stopFilter(language),
"kuromoji_stemmer",
regularStemmerFilter(language),
"asciifolding",
Expand All @@ -218,7 +207,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) {
"lowercase",
possessiveStemmerFilter(language),
"smartcn_stop",
stopFilter(language),
regularStemmerFilter(language),
"asciifolding")
.charFilters("html_strip");
Expand All @@ -229,7 +217,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) {
.tokenFilters(
"lowercase",
possessiveStemmerFilter(language),
stopFilter(language),
regularStemmerFilter(language),
"asciifolding",
synonymsGraphFilter(language),
Expand All @@ -244,7 +231,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) {
"lowercase",
possessiveStemmerFilter(language),
"smartcn_stop",
stopFilter(language),
regularStemmerFilter(language),
"asciifolding",
autocompleteEdgeNgramFilter(language))
Expand All @@ -256,10 +242,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) {
}

private static void configureSharedFilters(ElasticsearchAnalysisConfigurationContext context, Language language) {
context.tokenFilter(stopFilter(language))
.type("stop")
.param("stopwords", "_english_")
.param("ignore_case", "true");
context.tokenFilter(regularStemmerFilter(language))
.type("stemmer")
.param("language", "english");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,8 @@ private void indexAll(FailureCollector failureCollector) {
var future = searchMapping.scope(Object.class).massIndexer()
// no point in cleaning the data because of the rollover ^
.purgeAllOnStart(false)
// data is read-only after indexing -- we may as well have a single segment
.mergeSegmentsOnFinish(true)
.batchSizeToLoadObjects(indexingConfig.batchSize())
.threadsToLoadObjects(indexingConfig.parallelism().orElse(6))
.context(QuarkusIOLoadingContext.class, QuarkusIOLoadingContext.of(quarkusIO))
Expand Down
8 changes: 5 additions & 3 deletions src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,11 @@ quarkus.rest.path=/api
quarkus.hibernate-search-standalone.elasticsearch.version=opensearch:2.16
# Not using :latest here as a workaround until we get https://github.com/quarkusio/quarkus/pull/38896
quarkus.elasticsearch.devservices.image-name=opensearch-custom:${maven.version.opensearch}
quarkus.elasticsearch.devservices.java-opts=${PROD_OPENSEARCH_JAVA_OPTS}
# Limit parallelism of indexing, because OpenSearch can only handle so many documents in its buffers.
# This leads to at most 12*20=240 documents being indexed in parallel, which should be plenty
# This leads to at most 8*20=160 documents being indexed in parallel, which should be plenty
# given how large our documents can be.
INDEXING_QUEUE_COUNT=12
INDEXING_QUEUE_COUNT=8
INDEXING_BULK_SIZE=20
quarkus.hibernate-search-standalone.elasticsearch.indexing.queue-count=${INDEXING_QUEUE_COUNT}
quarkus.hibernate-search-standalone.elasticsearch.indexing.max-bulk-size=${INDEXING_BULK_SIZE}
Expand Down Expand Up @@ -236,7 +237,8 @@ quarkus.helm.values."opensearch-image".paths=(kind == StatefulSet).spec.template
quarkus.helm.values."opensearch-image".value=opensearch-custom:${maven.revision}
quarkus.helm.values."opensearch-image"[email protected]
# Resource requirements (overridden for staging, see src/main/helm)
quarkus.helm.values."@.opensearch.envs.OPENSEARCH_JAVA_OPTS".value=\ -Xms1g -Xmx1g
PROD_OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g
quarkus.helm.values."@.opensearch.envs.OPENSEARCH_JAVA_OPTS".value=\ ${PROD_OPENSEARCH_JAVA_OPTS}
quarkus.helm.values."@.opensearch.resources.limits.cpu".value=2000m
quarkus.helm.values."@.opensearch.resources.requests.cpu".value=500m
quarkus.helm.values."@.opensearch.resources.limits.memory".value=2Gi
Expand Down
Loading
Loading