From d0dad5264770dfb7653bc93bd281b2fb903f522e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 01:16:16 +0000 Subject: [PATCH 01/14] Bump the quarkus group with 3 updates Bumps the quarkus group with 3 updates: [io.quarkus:quarkus-bom](https://github.com/quarkusio/quarkus), [io.quarkus:quarkus-ide-config](https://github.com/quarkusio/quarkus) and io.quarkus:quarkus-maven-plugin. Updates `io.quarkus:quarkus-bom` from 3.13.2 to 3.14.0 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.13.2...3.14.0) Updates `io.quarkus:quarkus-ide-config` from 3.13.2 to 3.14.0 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.13.2...3.14.0) Updates `io.quarkus:quarkus-maven-plugin` from 3.13.2 to 3.14.0 Updates `io.quarkus:quarkus-ide-config` from 3.13.2 to 3.14.0 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.13.2...3.14.0) Updates `io.quarkus:quarkus-maven-plugin` from 3.13.2 to 3.14.0 --- updated-dependencies: - dependency-name: io.quarkus:quarkus-bom dependency-type: direct:production update-type: version-update:semver-minor dependency-group: quarkus - dependency-name: io.quarkus:quarkus-ide-config dependency-type: direct:production update-type: version-update:semver-minor dependency-group: quarkus - dependency-name: io.quarkus:quarkus-maven-plugin dependency-type: direct:production update-type: version-update:semver-minor dependency-group: quarkus - dependency-name: io.quarkus:quarkus-ide-config dependency-type: direct:production update-type: version-update:semver-minor dependency-group: quarkus - dependency-name: io.quarkus:quarkus-maven-plugin dependency-type: direct:production update-type: version-update:semver-minor dependency-group: quarkus ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bcb57b7..fdd0fa3 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ UTF-8 quarkus-bom io.quarkus - 3.13.2 + 3.14.0 999-SNAPSHOT true 3.4.0 From 7e1825c886c103f9424741a57ce3d5068d256692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Tue, 27 Aug 2024 16:14:13 +0200 Subject: [PATCH 02/14] Migrate from ValueConvert to ValueModel --- src/main/java/io/quarkus/search/app/SearchService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index 2a0502f..a626048 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -23,7 +23,7 @@ import io.quarkus.runtime.LaunchMode; import org.hibernate.search.engine.search.common.BooleanOperator; -import org.hibernate.search.engine.search.common.ValueConvert; +import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag; import org.hibernate.search.mapper.pojo.standalone.mapping.SearchMapping; @@ -104,7 +104,7 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi .defaultOperator(BooleanOperator.AND)) .should(f.match().field("origin").matching("quarkus").boost(50.0f)) .should(f.not(f.match().field(language.addSuffix("topics")) - .matching("compatibility", ValueConvert.NO)) + .matching("compatibility", ValueModel.INDEX)) .boost(50.0f))); } }) From 1772cb34a7b6ad7130fb787d22e3e631ba1d6c10 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 01:49:09 +0000 Subject: [PATCH 03/14] Bump org.apache.maven.plugins:maven-surefire-plugin Bumps the maven-plugins group with 1 update: [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire). Updates `org.apache.maven.plugins:maven-surefire-plugin` from 3.4.0 to 3.5.0 - [Release notes](https://github.com/apache/maven-surefire/releases) - [Commits](https://github.com/apache/maven-surefire/compare/surefire-3.4.0...surefire-3.5.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-surefire-plugin dependency-type: direct:production update-type: version-update:semver-minor dependency-group: maven-plugins ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fdd0fa3..262f90b 100644 --- a/pom.xml +++ b/pom.xml @@ -30,7 +30,7 @@ 3.14.0 999-SNAPSHOT true - 3.4.0 + 3.5.0 -Xms2g -Xmx2g 0.45.0 2.24.1 From 2c11b70b3116b2c315d11e1f0e0fc7a4f9131deb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 07:02:20 +0000 Subject: [PATCH 04/14] Bump the quarkus group with 3 updates Bumps the quarkus group with 3 updates: [io.quarkus:quarkus-bom](https://github.com/quarkusio/quarkus), [io.quarkus:quarkus-ide-config](https://github.com/quarkusio/quarkus) and io.quarkus:quarkus-maven-plugin. Updates `io.quarkus:quarkus-bom` from 3.14.0 to 3.14.1 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.14.0...3.14.1) Updates `io.quarkus:quarkus-ide-config` from 3.14.0 to 3.14.1 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.14.0...3.14.1) Updates `io.quarkus:quarkus-maven-plugin` from 3.14.0 to 3.14.1 Updates `io.quarkus:quarkus-ide-config` from 3.14.0 to 3.14.1 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.14.0...3.14.1) Updates `io.quarkus:quarkus-maven-plugin` from 3.14.0 to 3.14.1 --- updated-dependencies: - dependency-name: io.quarkus:quarkus-bom dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-ide-config dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-maven-plugin dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-ide-config dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-maven-plugin dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 262f90b..5a69f47 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ UTF-8 quarkus-bom io.quarkus - 3.14.0 + 3.14.1 999-SNAPSHOT true 3.5.0 From 6309a7c582a3d1627a5f21bfbe6421fdcaf4dc6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Fri, 6 Sep 2024 13:30:44 +0200 Subject: [PATCH 05/14] Use single-valued highlighting where possible --- .../java/io/quarkus/search/app/SearchService.java | 4 ++-- .../io/quarkus/search/app/dto/GuideSearchHit.java | 15 ++++++--------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index a626048..c1dc720 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -73,8 +73,8 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi f.id(), f.field("type"), f.field("origin"), - f.highlight(language.addSuffix("title")), - f.highlight(language.addSuffix("summary")), + f.highlight(language.addSuffix("title")).single(), + f.highlight(language.addSuffix("summary")).single(), f.highlight(language.addSuffix("fullContent")).highlighter("highlighter_content")) .asList(GuideSearchHit::new)) .where((f, root) -> { diff --git a/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java b/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java index 47d1ede..90925d9 100644 --- a/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java +++ b/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java @@ -10,21 +10,18 @@ public record GuideSearchHit(URI url, String type, String origin, String title, public GuideSearchHit(URI url, String type, String origin, - List title, - List summary, - List fullContent) { - this(url, type, origin, firstOrEmpty(title), firstOrEmpty(summary), wrap(fullContent)); + String title, + String summary, + List content) { + this(url, type, origin, title != null ? title : "", summary != null ? summary : "", wrap(content)); } @SuppressWarnings("unchecked") public GuideSearchHit(List values) { this( (URI) values.get(0), (String) values.get(1), (String) values.get(2), - (List) values.get(3), (List) values.get(4), (List) values.get(5)); - } - - private static String firstOrEmpty(List strings) { - return strings.isEmpty() ? "" : strings.get(0); + (String) values.get(3), (String) values.get(4), + (List) values.get(5)); } private static Set wrap(List strings) { From f15a747e28cef59debf553455a62bafa36d3d4da Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 01:11:29 +0000 Subject: [PATCH 06/14] Bump the quarkus group with 5 updates Bumps the quarkus group with 5 updates: | Package | From | To | | --- | --- | --- | | [io.quarkus:quarkus-bom](https://github.com/quarkusio/quarkus) | `3.14.1` | `3.14.2` | | [io.quarkus:quarkus-ide-config](https://github.com/quarkusio/quarkus) | `3.14.1` | `3.14.2` | | io.quarkus:quarkus-maven-plugin | `3.14.1` | `3.14.2` | | [io.quarkiverse.jgit:quarkus-jgit](https://github.com/quarkiverse/quarkus-jgit) | `3.1.2` | `3.1.3` | | [io.quarkiverse.web-bundler:quarkus-web-bundler](https://github.com/quarkiverse/quarkus-web-bundler) | `1.7.0` | `1.7.1` | Updates `io.quarkus:quarkus-bom` from 3.14.1 to 3.14.2 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.14.1...3.14.2) Updates `io.quarkus:quarkus-ide-config` from 3.14.1 to 3.14.2 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.14.1...3.14.2) Updates `io.quarkus:quarkus-maven-plugin` from 3.14.1 to 3.14.2 Updates `io.quarkiverse.jgit:quarkus-jgit` from 3.1.2 to 3.1.3 - [Release notes](https://github.com/quarkiverse/quarkus-jgit/releases) - [Commits](https://github.com/quarkiverse/quarkus-jgit/compare/3.1.2...3.1.3) Updates `io.quarkiverse.web-bundler:quarkus-web-bundler` from 1.7.0 to 1.7.1 - [Release notes](https://github.com/quarkiverse/quarkus-web-bundler/releases) - [Commits](https://github.com/quarkiverse/quarkus-web-bundler/compare/1.7.0...1.7.1) Updates `io.quarkus:quarkus-ide-config` from 3.14.1 to 3.14.2 - [Release notes](https://github.com/quarkusio/quarkus/releases) - [Commits](https://github.com/quarkusio/quarkus/compare/3.14.1...3.14.2) Updates `io.quarkus:quarkus-maven-plugin` from 3.14.1 to 3.14.2 --- updated-dependencies: - dependency-name: io.quarkus:quarkus-bom dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-ide-config dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-maven-plugin dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkiverse.jgit:quarkus-jgit dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkiverse.web-bundler:quarkus-web-bundler dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-ide-config dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus - dependency-name: io.quarkus:quarkus-maven-plugin dependency-type: direct:production update-type: version-update:semver-patch dependency-group: quarkus ... Signed-off-by: dependabot[bot] --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 5a69f47..ea6f718 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ UTF-8 quarkus-bom io.quarkus - 3.14.1 + 3.14.2 999-SNAPSHOT true 3.5.0 @@ -37,7 +37,7 @@ 1.11.0 2.16 - 1.7.0 + 1.7.1 @@ -58,7 +58,7 @@ io.quarkiverse.jgit quarkus-jgit - 3.1.2 + 3.1.3 io.quarkiverse.githubapi From f4b49cb0e6b62c02ea6d734e42ae4227972a2dbd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 06:28:58 +0000 Subject: [PATCH 07/14] Bump net.revelc.code:impsort-maven-plugin in the maven-plugins group Bumps the maven-plugins group with 1 update: net.revelc.code:impsort-maven-plugin. Updates `net.revelc.code:impsort-maven-plugin` from 1.11.0 to 1.12.0 --- updated-dependencies: - dependency-name: net.revelc.code:impsort-maven-plugin dependency-type: direct:production update-type: version-update:semver-minor dependency-group: maven-plugins ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ea6f718..243fa42 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ -Xms2g -Xmx2g 0.45.0 2.24.1 - 1.11.0 + 1.12.0 2.16 1.7.1 From c343b358e8b38e440277e2b7ca3d6ee5f9782a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Fri, 6 Sep 2024 14:58:19 +0200 Subject: [PATCH 08/14] Clarify highlighter configuration --- .../io/quarkus/search/app/SearchService.java | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index c1dc720..d75ebb4 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -36,7 +36,7 @@ @Path("/") public class SearchService { - private static final int NO_MATCH_SIZE = 32_600; + private static final int TITLE_OR_SUMMARY_MAX_SIZE = 32_600; private static final int PAGE_SIZE = 50; private static final long TOTAL_HIT_COUNT_THRESHOLD = 100; private static final String MAX_FOR_PERF_MESSAGE = "{jakarta.validation.constraints.Max.message} for performance reasons"; @@ -73,8 +73,8 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi f.id(), f.field("type"), f.field("origin"), - f.highlight(language.addSuffix("title")).single(), - f.highlight(language.addSuffix("summary")).single(), + f.highlight(language.addSuffix("title")).highlighter("highlighter_title_or_summary").single(), + f.highlight(language.addSuffix("summary")).highlighter("highlighter_title_or_summary").single(), f.highlight(language.addSuffix("fullContent")).highlighter("highlighter_content")) .asList(GuideSearchHit::new)) .where((f, root) -> { @@ -108,21 +108,25 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi .boost(50.0f))); } }) - // * Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. - // * We give control to the caller on the content snippet length and the number of these fragments - // * No match size is there to make sure that we are still going to get the text even if the field didn't have a match in it. - // * The title in the Guide entity is `Length.LONG` long, so we use that as a max value for no-match size, but hopefully nobody writes a title that long... - .highlighter( - f -> f.unified().noMatchSize(NO_MATCH_SIZE).fragmentSize(0) - .orderByScore(true) - .numberOfFragments(1) - .tag("", "") - .boundaryScanner().sentence().end()) - // * If there's no match in the full content we don't want to return anything. - // * Also content is really huge, so we want to only get small parts of the sentences. We are allowing caller to pick the number of sentences and their length: - .highlighter("highlighter_content", - f -> f.unified().noMatchSize(0).numberOfFragments(contentSnippets) - .fragmentSize(contentSnippetsLength)) + .highlighter(f -> f.unified() + // Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. + .tag("", "")) + .highlighter("highlighter_title_or_summary", f -> f.unified() + // We want the whole text of the field, regardless of whether it has a match or not. + .noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE) + .fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE) + // We want the whole text as a single fragment + .numberOfFragments(1)) + .highlighter("highlighter_content", f -> f.unified() + // If there's no match in the full content we don't want to return anything. + .noMatchSize(0) + // Content is really huge, so we want to only get small parts of the sentences. + // We give control to the caller on the content snippet length and the number of these fragments + .numberOfFragments(contentSnippets) + .fragmentSize(contentSnippetsLength) + // The rest of fragment configuration is static + .orderByScore(true) + .boundaryScanner().sentence().end()) .sort(f -> f.score().then().field(language.addSuffix("title_sort"))) .routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language)) .totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE) From 09907ad96d5433670c7a4340f324af8151a45061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Fri, 6 Sep 2024 16:31:22 +0200 Subject: [PATCH 09/14] Do not remove stopwords from search First because it's not strictly necessary, as the BM25 scoring we use (the default) takes inverse document frequency into account, so that a stopword is much less likely to impact the score. Second, because we require all terms given by the user to match, so stopwords won't add a gazillion more matching documents. And third, because removing stopwords leads to strange behavior with the fast-vector highlighter. Note I kept the cn/ja stopwords removers, because those seem to do more than remove stopwords. Without them, I got highlighting on commas, for example... --- .../app/hibernate/AnalysisConfigurer.java | 18 ------------------ .../quarkus/search/app/SearchServiceTest.java | 12 ++++++------ 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java index 504003d..896d814 100644 --- a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java +++ b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java @@ -44,10 +44,6 @@ public static String autocompleteAnalyzer(Language language) { return language.addSuffix(AUTOCOMPLETE); } - private static String stopFilter(Language language) { - return "stop_%s".formatted(language.code); - } - private static String regularStemmerFilter(Language language) { return "stemmer_%s".formatted(language.code); } @@ -99,8 +95,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont "lowercase", // To remove possessives (trailing 's) from words. possessiveStemmerFilter(language), - // To remove frequently used words that do not bring much meaning, e.g. a, that, and, are, as, at, with... - stopFilter(language), // To remove suffixes like -s/-es/-ed etc regularStemmerFilter(language), // To convert characters into ascii ones, e.g. à to a or ę to e etc. @@ -113,7 +107,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont .tokenFilters( "lowercase", possessiveStemmerFilter(language), - stopFilter(language), regularStemmerFilter(language), "asciifolding", // > In general, synonym filters rewrite their inputs to the tokenizer and filters used in the preceding analysis chain @@ -133,7 +126,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont compoundTechnicalNameFilter(language), "lowercase", possessiveStemmerFilter(language), - stopFilter(language), regularStemmerFilter(language), "asciifolding", autocompleteEdgeNgramFilter(language)) @@ -156,7 +148,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) { "kuromoji_part_of_speech", possessiveStemmerFilter(language), "ja_stop", - stopFilter(language), "kuromoji_stemmer", regularStemmerFilter(language), "asciifolding") @@ -175,7 +166,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) { "kuromoji_part_of_speech", possessiveStemmerFilter(language), "ja_stop", - stopFilter(language), "kuromoji_stemmer", regularStemmerFilter(language), "asciifolding", @@ -193,7 +183,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) { "kuromoji_part_of_speech", possessiveStemmerFilter(language), "ja_stop", - stopFilter(language), "kuromoji_stemmer", regularStemmerFilter(language), "asciifolding", @@ -218,7 +207,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { "lowercase", possessiveStemmerFilter(language), "smartcn_stop", - stopFilter(language), regularStemmerFilter(language), "asciifolding") .charFilters("html_strip"); @@ -229,7 +217,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { .tokenFilters( "lowercase", possessiveStemmerFilter(language), - stopFilter(language), regularStemmerFilter(language), "asciifolding", synonymsGraphFilter(language), @@ -244,7 +231,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { "lowercase", possessiveStemmerFilter(language), "smartcn_stop", - stopFilter(language), regularStemmerFilter(language), "asciifolding", autocompleteEdgeNgramFilter(language)) @@ -256,10 +242,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { } private static void configureSharedFilters(ElasticsearchAnalysisConfigurationContext context, Language language) { - context.tokenFilter(stopFilter(language)) - .type("stop") - .param("stopwords", "_english_") - .param("ignore_case", "true"); context.tokenFilter(regularStemmerFilter(language)) .type("stemmer") .param("language", "english"); diff --git a/src/test/java/io/quarkus/search/app/SearchServiceTest.java b/src/test/java/io/quarkus/search/app/SearchServiceTest.java index 02c29fa..dbed921 100644 --- a/src/test/java/io/quarkus/search/app/SearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SearchServiceTest.java @@ -203,8 +203,8 @@ private static List relevance() { // TODO Shouldn't the ORM guide be before Panache? GuideRef.HIBERNATE_ORM_PANACHE, GuideRef.HIBERNATE_ORM, - GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN, GuideRef.HIBERNATE_SEARCH_ORM_ELASTICSEARCH, + GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN, GuideRef.HIBERNATE_REACTIVE_PANACHE, GuideRef.HIBERNATE_REACTIVE)), Arguments.of("reactive", GuideRef.urls( @@ -217,8 +217,8 @@ private static List relevance() { GuideRef.HIBERNATE_REACTIVE, GuideRef.HIBERNATE_REACTIVE_PANACHE, GuideRef.HIBERNATE_ORM_PANACHE, - GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN, - GuideRef.HIBERNATE_ORM)), + GuideRef.HIBERNATE_ORM, + GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN)), Arguments.of("jpa", GuideRef.urls( GuideRef.HIBERNATE_ORM, GuideRef.HIBERNATE_ORM_PANACHE, @@ -433,7 +433,7 @@ void quoteEmptyQuoteTitleTranslation() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::title) .contains( - "Duplicated context, context locals, asynchronous processing and propagation"); + "Duplicated context, context locals, asynchronous processing and propagation"); } @Test @@ -447,7 +447,7 @@ void searchForPhrase() { assertThat(result.hits()).extracting(GuideSearchHit::title) .contains( // unified highlighter will still "highlight" the phrase word by word: - "Duplicated context, context locals, asynchronous processing and propagation"); + "Duplicated context, context locals, asynchronous processing and propagation"); } @Test @@ -514,7 +514,7 @@ void findAllUppercase() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::title) .contains( - "Duplicated context, context locals, asynchronous processing and propagation"); + "Duplicated context, context locals, asynchronous processing and propagation"); } private static ThrowingConsumer hitsHaveCorrectWordHighlighted(AtomicInteger matches, String word, From 5af82c3537c79590c8e15fa6d5fed3a556b13984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Tue, 10 Sep 2024 17:58:20 +0200 Subject: [PATCH 10/14] Simplify/relax some tests In particular, avoid over-specific assertions that test more than what we're interested in. --- .../quarkus/search/app/SearchServiceTest.java | 4 +- .../search/app/SynonymSearchServiceTest.java | 54 +++++++++++-------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/src/test/java/io/quarkus/search/app/SearchServiceTest.java b/src/test/java/io/quarkus/search/app/SearchServiceTest.java index dbed921..d94307d 100644 --- a/src/test/java/io/quarkus/search/app/SearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SearchServiceTest.java @@ -366,7 +366,9 @@ void highlight_content() { assertThat(result.hits()).extracting(GuideSearchHit::content).hasSize(9) .allSatisfy(content -> assertThat(content).hasSize(1) .allSatisfy(hitsHaveCorrectWordHighlighted(matches, "orm", "highlighted-content"))); - assertThat(matches.get()).isEqualTo(10); + assertThat(matches.get()) + .as("Number of occurrences of 'orm' in " + result.hits().stream().map(GuideSearchHit::content).toList()) + .isEqualTo(10); } @Test diff --git a/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java index 506eb25..f18a1d7 100644 --- a/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java @@ -41,53 +41,61 @@ void setup() { @ParameterizedTest @MethodSource - void synonymsTitle(String query, String result) { - assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::title) - .contains(result); + void synonymsTitle(String query, Set expectedTitleHighlights) { + var hits = searchHitSearchResult(query).hits(); + assertThat(expectedTitleHighlights) + .allSatisfy(expectedTitleHighlight -> { + assertThat(hits) + .extracting(GuideSearchHit::title) + .anySatisfy(hitTitle -> assertThat(hitTitle).containsIgnoringCase(expectedTitleHighlight)); + }); } private List synonymsTitle() { return List.of( Arguments.of("REST Development Service", - "Dev Services Overview"), + Set.of("Dev Services")), Arguments.of("rest easy", - "Writing REST Services with Quarkus REST (formerly RESTEasy Reactive)"), + Set.of("REST", "RESTEasy")), Arguments.of("vertx", - "Vert.x Reference Guide"), + Set.of("Vert.x")), Arguments.of("rest api", - "Writing REST Services with Quarkus REST (formerly RESTEasy Reactive)"), + Set.of("REST", "RESTEasy")), Arguments.of("config", - "All configuration options"), + Set.of("configuration")), Arguments.of("config option", - "All configuration options"), + Set.of("configuration options")), Arguments.of("jpa", - "Using Hibernate ORM and Jakarta Persistence")); + Set.of("Jakarta Persistence"))); } @ParameterizedTest @MethodSource - void synonymsContent(String query, Set result) { - assertThat(searchHitSearchResult(query).hits()).flatExtracting(GuideSearchHit::content) - .containsAll(result); + void synonymsContent(String query, Set expectedContentHighlights) { + var hits = searchHitSearchResult(query).hits(); + assertThat(expectedContentHighlights) + .allSatisfy(expectedContentHighlight -> { + assertThat(hits) + .flatExtracting(GuideSearchHit::content) + .anySatisfy(hitTitle -> assertThat(hitTitle).containsIgnoringCase(expectedContentHighlight)); + }); } private List synonymsContent() { return List.of( Arguments.of("Development Service", - Set.of("…also offer Dev Services.…", - "…In this case, before starting a container, Dev Services for AMQP looks for a container with the quarkus-dev-service-amqp…")), + Set.of("Dev Services", + "dev-service-amqp")), Arguments.of("dev Service", - Set.of("…also offer Dev Services.…", - "…In this case, before starting a container, Dev Services for AMQP looks for a container with the quarkus-dev-service-amqp…")), + Set.of("Dev Services", + "dev-service-amqp")), Arguments.of("rest easy", - Set.of("…Writing REST Services with Quarkus REST (formerly RESTEasy Reactive) This guide explains how to write…", - "…We recommend doing so at your application entry point boundaries like your REST endpoint controllers.…")), + Set.of("REST", "RESTEasy")), Arguments.of("vertx", - Set.of("…}\n\n} You can inject either the: io.vertx.core.Vertx instance exposing the bare Vert.x API io.vertx.mutiny.core.Vertx…", - "…Access the Vert.x instance To access the managed Vert.x instance, add the quarkus-vertx extension to…")), + Set.of("io.vertx.core.Vertx", + "Vert.x", "vertx")), Arguments.of("rest api", - Set.of("…Writing REST Services with Quarkus REST (formerly RESTEasy Reactive) This guide explains how to write…", - "…We recommend doing so at your application entry point boundaries like your REST endpoint controllers.…"))); + Set.of("REST", "RESTEasy"))); } private static SearchResult searchHitSearchResult(String q) { From 42ebd004b6fa5d92960b0615fb8ead6b1b6cb726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Fri, 6 Sep 2024 13:45:18 +0200 Subject: [PATCH 11/14] Use fast-vector highlighting for lower search latency --- .../io/quarkus/search/app/SearchService.java | 9 +++++---- .../io/quarkus/search/app/entity/Guide.java | 6 +++--- .../quarkus/search/app/SearchServiceTest.java | 12 ++++++------ .../search/app/SynonymSearchServiceTest.java | 19 +++++++++++-------- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index d75ebb4..1c70084 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -108,16 +108,16 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi .boost(50.0f))); } }) - .highlighter(f -> f.unified() + .highlighter(f -> f.fastVector() // Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. .tag("", "")) - .highlighter("highlighter_title_or_summary", f -> f.unified() + .highlighter("highlighter_title_or_summary", f -> f.fastVector() // We want the whole text of the field, regardless of whether it has a match or not. .noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE) .fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE) // We want the whole text as a single fragment .numberOfFragments(1)) - .highlighter("highlighter_content", f -> f.unified() + .highlighter("highlighter_content", f -> f.fastVector() // If there's no match in the full content we don't want to return anything. .noMatchSize(0) // Content is really huge, so we want to only get small parts of the sentences. @@ -126,7 +126,8 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi .fragmentSize(contentSnippetsLength) // The rest of fragment configuration is static .orderByScore(true) - .boundaryScanner().sentence().end()) + // We don't use sentence boundaries because those can result in huge fragments + .boundaryScanner().chars().boundaryMaxScan(10).end()) .sort(f -> f.score().then().field(language.addSuffix("title_sort"))) .routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language)) .totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE) diff --git a/src/main/java/io/quarkus/search/app/entity/Guide.java b/src/main/java/io/quarkus/search/app/entity/Guide.java index 9fd34ef..4206777 100644 --- a/src/main/java/io/quarkus/search/app/entity/Guide.java +++ b/src/main/java/io/quarkus/search/app/entity/Guide.java @@ -46,12 +46,12 @@ public class Guide { @KeywordField public String origin; - @I18nFullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "title_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nKeywordField(name = "title_sort", normalizerPrefix = AnalysisConfigurer.SORT, searchable = Searchable.NO, sortable = Sortable.YES) public I18nData title = new I18nData<>(); - @I18nFullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "summary_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) public I18nData summary = new I18nData<>(); @@ -59,7 +59,7 @@ public class Guide { @I18nFullTextField(name = "keywords_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) public I18nData keywords = new I18nData<>(); - @I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) public I18nData htmlFullContentProvider = new I18nData<>(); diff --git a/src/test/java/io/quarkus/search/app/SearchServiceTest.java b/src/test/java/io/quarkus/search/app/SearchServiceTest.java index d94307d..f6f0dc2 100644 --- a/src/test/java/io/quarkus/search/app/SearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SearchServiceTest.java @@ -368,7 +368,7 @@ void highlight_content() { .allSatisfy(hitsHaveCorrectWordHighlighted(matches, "orm", "highlighted-content"))); assertThat(matches.get()) .as("Number of occurrences of 'orm' in " + result.hits().stream().map(GuideSearchHit::content).toList()) - .isEqualTo(10); + .isEqualTo(14); } @Test @@ -448,8 +448,8 @@ void searchForPhrase() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::title) .contains( - // unified highlighter will still "highlight" the phrase word by word: - "Duplicated context, context locals, asynchronous processing and propagation"); + // fast-vector highlighter will highlight the phrase: + "Duplicated context, context locals, asynchronous processing and propagation"); } @Test @@ -477,7 +477,7 @@ void findConfigProperty() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::content) .containsOnly( - Set.of("…Environment variable: QUARKUS_VIRTUAL_THREADS_ENABLED Show more boolean true WebSockets Client Type Default quarkus.websocket.max-frame-size…")); + Set.of("…Default quarkus.websocket.max-frame-size The maximum amount of data that can be sent in a single frame. Messages…")); } @Test @@ -490,7 +490,7 @@ void findFQCN() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::content) .containsOnly(Set.of( - "…io.quarkus.deployment.builditem.nativeimage.NativeImageAllowIncompleteClasspathAggregateBuildItem Do not use directly: use instead. boolean allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem…")); + "…allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem No Javadoc found Path…")); } @Test @@ -503,7 +503,7 @@ void findBuildItem() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::content) .containsOnly(Set.of( - "…io.quarkus.deployment.builditem.nativeimage.NativeImageAllowIncompleteClasspathAggregateBuildItem Do not use directly: use instead. boolean allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem…")); + "…allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem No Javadoc found Path…")); } @Test diff --git a/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java index f18a1d7..1f606f5 100644 --- a/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java @@ -54,7 +54,7 @@ void synonymsTitle(String query, Set expectedTitleHighlights) { private List synonymsTitle() { return List.of( Arguments.of("REST Development Service", - Set.of("Dev Services")), + Set.of("Dev Services")), Arguments.of("rest easy", Set.of("REST", "RESTEasy")), Arguments.of("vertx", @@ -64,9 +64,9 @@ private List synonymsTitle() { Arguments.of("config", Set.of("configuration")), Arguments.of("config option", - Set.of("configuration options")), + Set.of("configuration options")), Arguments.of("jpa", - Set.of("Jakarta Persistence"))); + Set.of("Jakarta Persistence"))); } @ParameterizedTest @@ -84,11 +84,11 @@ void synonymsContent(String query, Set expectedContentHighlights) { private List synonymsContent() { return List.of( Arguments.of("Development Service", - Set.of("Dev Services", - "dev-service-amqp")), + Set.of("Dev Services", + "dev-service-amqp")), Arguments.of("dev Service", - Set.of("Dev Services", - "dev-service-amqp")), + Set.of("Dev Services", + "dev-service-amqp")), Arguments.of("rest easy", Set.of("REST", "RESTEasy")), Arguments.of("vertx", @@ -101,7 +101,10 @@ private List synonymsContent() { private static SearchResult searchHitSearchResult(String q) { return given() .queryParam("q", q) - .queryParam("contentSnippets", 2) + // Bumping the number of snippets to give low-score matching terms more chance to appear in highlights. + // This is fine because these tests are not about relevance, + // just about checking that synonyms are detected correctly. + .queryParam("contentSnippets", 10) .when().get(GUIDES_SEARCH) .then() .statusCode(200) From df69bd39835fe9f40976805d68aa15a1b8778a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Fri, 6 Sep 2024 13:59:47 +0200 Subject: [PATCH 12/14] Align OpenSearch Dev Services Java Opts on those used in prod In practice that was already more or less the case, but this ensure things will remain aligned if we change them in prod. --- src/main/resources/application.properties | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 969fd3d..afc623e 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -70,6 +70,7 @@ quarkus.rest.path=/api quarkus.hibernate-search-standalone.elasticsearch.version=opensearch:2.16 # Not using :latest here as a workaround until we get https://github.com/quarkusio/quarkus/pull/38896 quarkus.elasticsearch.devservices.image-name=opensearch-custom:${maven.version.opensearch} +quarkus.elasticsearch.devservices.java-opts=${PROD_OPENSEARCH_JAVA_OPTS} # Limit parallelism of indexing, because OpenSearch can only handle so many documents in its buffers. # This leads to at most 12*20=240 documents being indexed in parallel, which should be plenty # given how large our documents can be. @@ -236,7 +237,8 @@ quarkus.helm.values."opensearch-image".paths=(kind == StatefulSet).spec.template quarkus.helm.values."opensearch-image".value=opensearch-custom:${maven.revision} quarkus.helm.values."opensearch-image".property=@.opensearch.image # Resource requirements (overridden for staging, see src/main/helm) -quarkus.helm.values."@.opensearch.envs.OPENSEARCH_JAVA_OPTS".value=\ -Xms1g -Xmx1g +PROD_OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g +quarkus.helm.values."@.opensearch.envs.OPENSEARCH_JAVA_OPTS".value=\ ${PROD_OPENSEARCH_JAVA_OPTS} quarkus.helm.values."@.opensearch.resources.limits.cpu".value=2000m quarkus.helm.values."@.opensearch.resources.requests.cpu".value=500m quarkus.helm.values."@.opensearch.resources.limits.memory".value=2Gi From 610a53b0b4b9906d2a4a34c1b12a02e054ff0fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Fri, 6 Sep 2024 14:38:36 +0200 Subject: [PATCH 13/14] Merge segments after indexing --- .../java/io/quarkus/search/app/indexing/IndexingService.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/io/quarkus/search/app/indexing/IndexingService.java b/src/main/java/io/quarkus/search/app/indexing/IndexingService.java index 18a45f1..dde72e7 100644 --- a/src/main/java/io/quarkus/search/app/indexing/IndexingService.java +++ b/src/main/java/io/quarkus/search/app/indexing/IndexingService.java @@ -238,6 +238,8 @@ private void indexAll(FailureCollector failureCollector) { var future = searchMapping.scope(Object.class).massIndexer() // no point in cleaning the data because of the rollover ^ .purgeAllOnStart(false) + // data is read-only after indexing -- we may as well have a single segment + .mergeSegmentsOnFinish(true) .batchSizeToLoadObjects(indexingConfig.batchSize()) .threadsToLoadObjects(indexingConfig.parallelism().orElse(6)) .context(QuarkusIOLoadingContext.class, QuarkusIOLoadingContext.of(quarkusIO)) From 06f3b5f4d17797b537e0ef0da095604ff290c2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yoann=20Rodi=C3=A8re?= Date: Wed, 11 Sep 2024 10:23:37 +0200 Subject: [PATCH 14/14] Reduce the queue count and bulk size in dev/prod Because the previous settings used to trip circuit breakers in OpenSearch due to lack of memory to put all the pending documents on the heap. Note dev and prod relying on the same settings, with the same amount of heap given to OpenSearch. --- src/main/helm/values.staging.yaml | 2 +- src/main/resources/application.properties | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/helm/values.staging.yaml b/src/main/helm/values.staging.yaml index 0bf22a7..a935ea2 100644 --- a/src/main/helm/values.staging.yaml +++ b/src/main/helm/values.staging.yaml @@ -2,8 +2,8 @@ app: envs: QUARKUS_PROFILE: 'staging' # Avoid overloading the rather resource-constrained OpenSearch instance - INDEXING_BULK_SIZE: '10' INDEXING_QUEUE_COUNT: '6' + INDEXING_BULK_SIZE: '10' resources: limits: cpu: 2000m diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index afc623e..fc7c181 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -72,9 +72,9 @@ quarkus.hibernate-search-standalone.elasticsearch.version=opensearch:2.16 quarkus.elasticsearch.devservices.image-name=opensearch-custom:${maven.version.opensearch} quarkus.elasticsearch.devservices.java-opts=${PROD_OPENSEARCH_JAVA_OPTS} # Limit parallelism of indexing, because OpenSearch can only handle so many documents in its buffers. -# This leads to at most 12*20=240 documents being indexed in parallel, which should be plenty +# This leads to at most 8*20=160 documents being indexed in parallel, which should be plenty # given how large our documents can be. -INDEXING_QUEUE_COUNT=12 +INDEXING_QUEUE_COUNT=8 INDEXING_BULK_SIZE=20 quarkus.hibernate-search-standalone.elasticsearch.indexing.queue-count=${INDEXING_QUEUE_COUNT} quarkus.hibernate-search-standalone.elasticsearch.indexing.max-bulk-size=${INDEXING_BULK_SIZE}