diff --git a/pom.xml b/pom.xml index bcb57b7a..243fa42f 100644 --- a/pom.xml +++ b/pom.xml @@ -27,17 +27,17 @@ UTF-8 quarkus-bom io.quarkus - 3.13.2 + 3.14.2 999-SNAPSHOT true - 3.4.0 + 3.5.0 -Xms2g -Xmx2g 0.45.0 2.24.1 - 1.11.0 + 1.12.0 2.16 - 1.7.0 + 1.7.1 @@ -58,7 +58,7 @@ io.quarkiverse.jgit quarkus-jgit - 3.1.2 + 3.1.3 io.quarkiverse.githubapi diff --git a/src/main/helm/values.staging.yaml b/src/main/helm/values.staging.yaml index 0bf22a7f..a935ea29 100644 --- a/src/main/helm/values.staging.yaml +++ b/src/main/helm/values.staging.yaml @@ -2,8 +2,8 @@ app: envs: QUARKUS_PROFILE: 'staging' # Avoid overloading the rather resource-constrained OpenSearch instance - INDEXING_BULK_SIZE: '10' INDEXING_QUEUE_COUNT: '6' + INDEXING_BULK_SIZE: '10' resources: limits: cpu: 2000m diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index 2a0502f3..1c70084c 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -23,7 +23,7 @@ import io.quarkus.runtime.LaunchMode; import org.hibernate.search.engine.search.common.BooleanOperator; -import org.hibernate.search.engine.search.common.ValueConvert; +import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag; import org.hibernate.search.mapper.pojo.standalone.mapping.SearchMapping; @@ -36,7 +36,7 @@ @Path("/") public class SearchService { - private static final int NO_MATCH_SIZE = 32_600; + private static final int TITLE_OR_SUMMARY_MAX_SIZE = 32_600; private static final int PAGE_SIZE = 50; private static final long TOTAL_HIT_COUNT_THRESHOLD = 100; private static final String MAX_FOR_PERF_MESSAGE = "{jakarta.validation.constraints.Max.message} for performance reasons"; @@ -73,8 +73,8 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi f.id(), f.field("type"), f.field("origin"), - f.highlight(language.addSuffix("title")), - f.highlight(language.addSuffix("summary")), + f.highlight(language.addSuffix("title")).highlighter("highlighter_title_or_summary").single(), + f.highlight(language.addSuffix("summary")).highlighter("highlighter_title_or_summary").single(), f.highlight(language.addSuffix("fullContent")).highlighter("highlighter_content")) .asList(GuideSearchHit::new)) .where((f, root) -> { @@ -104,25 +104,30 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi .defaultOperator(BooleanOperator.AND)) .should(f.match().field("origin").matching("quarkus").boost(50.0f)) .should(f.not(f.match().field(language.addSuffix("topics")) - .matching("compatibility", ValueConvert.NO)) + .matching("compatibility", ValueModel.INDEX)) .boost(50.0f))); } }) - // * Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. - // * We give control to the caller on the content snippet length and the number of these fragments - // * No match size is there to make sure that we are still going to get the text even if the field didn't have a match in it. - // * The title in the Guide entity is `Length.LONG` long, so we use that as a max value for no-match size, but hopefully nobody writes a title that long... - .highlighter( - f -> f.unified().noMatchSize(NO_MATCH_SIZE).fragmentSize(0) - .orderByScore(true) - .numberOfFragments(1) - .tag("", "") - .boundaryScanner().sentence().end()) - // * If there's no match in the full content we don't want to return anything. - // * Also content is really huge, so we want to only get small parts of the sentences. We are allowing caller to pick the number of sentences and their length: - .highlighter("highlighter_content", - f -> f.unified().noMatchSize(0).numberOfFragments(contentSnippets) - .fragmentSize(contentSnippetsLength)) + .highlighter(f -> f.fastVector() + // Highlighters are going to use spans-with-classes so that we will have more control over styling the visual on the search results screen. + .tag("", "")) + .highlighter("highlighter_title_or_summary", f -> f.fastVector() + // We want the whole text of the field, regardless of whether it has a match or not. + .noMatchSize(TITLE_OR_SUMMARY_MAX_SIZE) + .fragmentSize(TITLE_OR_SUMMARY_MAX_SIZE) + // We want the whole text as a single fragment + .numberOfFragments(1)) + .highlighter("highlighter_content", f -> f.fastVector() + // If there's no match in the full content we don't want to return anything. + .noMatchSize(0) + // Content is really huge, so we want to only get small parts of the sentences. + // We give control to the caller on the content snippet length and the number of these fragments + .numberOfFragments(contentSnippets) + .fragmentSize(contentSnippetsLength) + // The rest of fragment configuration is static + .orderByScore(true) + // We don't use sentence boundaries because those can result in huge fragments + .boundaryScanner().chars().boundaryMaxScan(10).end()) .sort(f -> f.score().then().field(language.addSuffix("title_sort"))) .routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language)) .totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE) diff --git a/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java b/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java index 47d1edea..90925d91 100644 --- a/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java +++ b/src/main/java/io/quarkus/search/app/dto/GuideSearchHit.java @@ -10,21 +10,18 @@ public record GuideSearchHit(URI url, String type, String origin, String title, public GuideSearchHit(URI url, String type, String origin, - List title, - List summary, - List fullContent) { - this(url, type, origin, firstOrEmpty(title), firstOrEmpty(summary), wrap(fullContent)); + String title, + String summary, + List content) { + this(url, type, origin, title != null ? title : "", summary != null ? summary : "", wrap(content)); } @SuppressWarnings("unchecked") public GuideSearchHit(List values) { this( (URI) values.get(0), (String) values.get(1), (String) values.get(2), - (List) values.get(3), (List) values.get(4), (List) values.get(5)); - } - - private static String firstOrEmpty(List strings) { - return strings.isEmpty() ? "" : strings.get(0); + (String) values.get(3), (String) values.get(4), + (List) values.get(5)); } private static Set wrap(List strings) { diff --git a/src/main/java/io/quarkus/search/app/entity/Guide.java b/src/main/java/io/quarkus/search/app/entity/Guide.java index 9fd34ef8..4206777b 100644 --- a/src/main/java/io/quarkus/search/app/entity/Guide.java +++ b/src/main/java/io/quarkus/search/app/entity/Guide.java @@ -46,12 +46,12 @@ public class Guide { @KeywordField public String origin; - @I18nFullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "title_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nKeywordField(name = "title_sort", normalizerPrefix = AnalysisConfigurer.SORT, searchable = Searchable.NO, sortable = Sortable.YES) public I18nData title = new I18nData<>(); - @I18nFullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "summary_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) public I18nData summary = new I18nData<>(); @@ -59,7 +59,7 @@ public class Guide { @I18nFullTextField(name = "keywords_autocomplete", analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) public I18nData keywords = new I18nData<>(); - @I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) + @I18nFullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.FAST_VECTOR, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzerPrefix = AnalysisConfigurer.DEFAULT, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @I18nFullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzerPrefix = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzerPrefix = AnalysisConfigurer.DEFAULT_SEARCH) @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) public I18nData htmlFullContentProvider = new I18nData<>(); diff --git a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java index 504003de..896d8142 100644 --- a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java +++ b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java @@ -44,10 +44,6 @@ public static String autocompleteAnalyzer(Language language) { return language.addSuffix(AUTOCOMPLETE); } - private static String stopFilter(Language language) { - return "stop_%s".formatted(language.code); - } - private static String regularStemmerFilter(Language language) { return "stemmer_%s".formatted(language.code); } @@ -99,8 +95,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont "lowercase", // To remove possessives (trailing 's) from words. possessiveStemmerFilter(language), - // To remove frequently used words that do not bring much meaning, e.g. a, that, and, are, as, at, with... - stopFilter(language), // To remove suffixes like -s/-es/-ed etc regularStemmerFilter(language), // To convert characters into ascii ones, e.g. à to a or ę to e etc. @@ -113,7 +107,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont .tokenFilters( "lowercase", possessiveStemmerFilter(language), - stopFilter(language), regularStemmerFilter(language), "asciifolding", // > In general, synonym filters rewrite their inputs to the tokenizer and filters used in the preceding analysis chain @@ -133,7 +126,6 @@ void configureEnglishLikeLanguage(ElasticsearchAnalysisConfigurationContext cont compoundTechnicalNameFilter(language), "lowercase", possessiveStemmerFilter(language), - stopFilter(language), regularStemmerFilter(language), "asciifolding", autocompleteEdgeNgramFilter(language)) @@ -156,7 +148,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) { "kuromoji_part_of_speech", possessiveStemmerFilter(language), "ja_stop", - stopFilter(language), "kuromoji_stemmer", regularStemmerFilter(language), "asciifolding") @@ -175,7 +166,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) { "kuromoji_part_of_speech", possessiveStemmerFilter(language), "ja_stop", - stopFilter(language), "kuromoji_stemmer", regularStemmerFilter(language), "asciifolding", @@ -193,7 +183,6 @@ void configureJapanese(ElasticsearchAnalysisConfigurationContext context) { "kuromoji_part_of_speech", possessiveStemmerFilter(language), "ja_stop", - stopFilter(language), "kuromoji_stemmer", regularStemmerFilter(language), "asciifolding", @@ -218,7 +207,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { "lowercase", possessiveStemmerFilter(language), "smartcn_stop", - stopFilter(language), regularStemmerFilter(language), "asciifolding") .charFilters("html_strip"); @@ -229,7 +217,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { .tokenFilters( "lowercase", possessiveStemmerFilter(language), - stopFilter(language), regularStemmerFilter(language), "asciifolding", synonymsGraphFilter(language), @@ -244,7 +231,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { "lowercase", possessiveStemmerFilter(language), "smartcn_stop", - stopFilter(language), regularStemmerFilter(language), "asciifolding", autocompleteEdgeNgramFilter(language)) @@ -256,10 +242,6 @@ void configureChinese(ElasticsearchAnalysisConfigurationContext context) { } private static void configureSharedFilters(ElasticsearchAnalysisConfigurationContext context, Language language) { - context.tokenFilter(stopFilter(language)) - .type("stop") - .param("stopwords", "_english_") - .param("ignore_case", "true"); context.tokenFilter(regularStemmerFilter(language)) .type("stemmer") .param("language", "english"); diff --git a/src/main/java/io/quarkus/search/app/indexing/IndexingService.java b/src/main/java/io/quarkus/search/app/indexing/IndexingService.java index 18a45f1c..dde72e75 100644 --- a/src/main/java/io/quarkus/search/app/indexing/IndexingService.java +++ b/src/main/java/io/quarkus/search/app/indexing/IndexingService.java @@ -238,6 +238,8 @@ private void indexAll(FailureCollector failureCollector) { var future = searchMapping.scope(Object.class).massIndexer() // no point in cleaning the data because of the rollover ^ .purgeAllOnStart(false) + // data is read-only after indexing -- we may as well have a single segment + .mergeSegmentsOnFinish(true) .batchSizeToLoadObjects(indexingConfig.batchSize()) .threadsToLoadObjects(indexingConfig.parallelism().orElse(6)) .context(QuarkusIOLoadingContext.class, QuarkusIOLoadingContext.of(quarkusIO)) diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 969fd3de..fc7c181c 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -70,10 +70,11 @@ quarkus.rest.path=/api quarkus.hibernate-search-standalone.elasticsearch.version=opensearch:2.16 # Not using :latest here as a workaround until we get https://github.com/quarkusio/quarkus/pull/38896 quarkus.elasticsearch.devservices.image-name=opensearch-custom:${maven.version.opensearch} +quarkus.elasticsearch.devservices.java-opts=${PROD_OPENSEARCH_JAVA_OPTS} # Limit parallelism of indexing, because OpenSearch can only handle so many documents in its buffers. -# This leads to at most 12*20=240 documents being indexed in parallel, which should be plenty +# This leads to at most 8*20=160 documents being indexed in parallel, which should be plenty # given how large our documents can be. -INDEXING_QUEUE_COUNT=12 +INDEXING_QUEUE_COUNT=8 INDEXING_BULK_SIZE=20 quarkus.hibernate-search-standalone.elasticsearch.indexing.queue-count=${INDEXING_QUEUE_COUNT} quarkus.hibernate-search-standalone.elasticsearch.indexing.max-bulk-size=${INDEXING_BULK_SIZE} @@ -236,7 +237,8 @@ quarkus.helm.values."opensearch-image".paths=(kind == StatefulSet).spec.template quarkus.helm.values."opensearch-image".value=opensearch-custom:${maven.revision} quarkus.helm.values."opensearch-image".property=@.opensearch.image # Resource requirements (overridden for staging, see src/main/helm) -quarkus.helm.values."@.opensearch.envs.OPENSEARCH_JAVA_OPTS".value=\ -Xms1g -Xmx1g +PROD_OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g +quarkus.helm.values."@.opensearch.envs.OPENSEARCH_JAVA_OPTS".value=\ ${PROD_OPENSEARCH_JAVA_OPTS} quarkus.helm.values."@.opensearch.resources.limits.cpu".value=2000m quarkus.helm.values."@.opensearch.resources.requests.cpu".value=500m quarkus.helm.values."@.opensearch.resources.limits.memory".value=2Gi diff --git a/src/test/java/io/quarkus/search/app/SearchServiceTest.java b/src/test/java/io/quarkus/search/app/SearchServiceTest.java index 02c29fa0..f6f0dc2c 100644 --- a/src/test/java/io/quarkus/search/app/SearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SearchServiceTest.java @@ -203,8 +203,8 @@ private static List relevance() { // TODO Shouldn't the ORM guide be before Panache? GuideRef.HIBERNATE_ORM_PANACHE, GuideRef.HIBERNATE_ORM, - GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN, GuideRef.HIBERNATE_SEARCH_ORM_ELASTICSEARCH, + GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN, GuideRef.HIBERNATE_REACTIVE_PANACHE, GuideRef.HIBERNATE_REACTIVE)), Arguments.of("reactive", GuideRef.urls( @@ -217,8 +217,8 @@ private static List relevance() { GuideRef.HIBERNATE_REACTIVE, GuideRef.HIBERNATE_REACTIVE_PANACHE, GuideRef.HIBERNATE_ORM_PANACHE, - GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN, - GuideRef.HIBERNATE_ORM)), + GuideRef.HIBERNATE_ORM, + GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN)), Arguments.of("jpa", GuideRef.urls( GuideRef.HIBERNATE_ORM, GuideRef.HIBERNATE_ORM_PANACHE, @@ -366,7 +366,9 @@ void highlight_content() { assertThat(result.hits()).extracting(GuideSearchHit::content).hasSize(9) .allSatisfy(content -> assertThat(content).hasSize(1) .allSatisfy(hitsHaveCorrectWordHighlighted(matches, "orm", "highlighted-content"))); - assertThat(matches.get()).isEqualTo(10); + assertThat(matches.get()) + .as("Number of occurrences of 'orm' in " + result.hits().stream().map(GuideSearchHit::content).toList()) + .isEqualTo(14); } @Test @@ -433,7 +435,7 @@ void quoteEmptyQuoteTitleTranslation() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::title) .contains( - "Duplicated context, context locals, asynchronous processing and propagation"); + "Duplicated context, context locals, asynchronous processing and propagation"); } @Test @@ -446,8 +448,8 @@ void searchForPhrase() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::title) .contains( - // unified highlighter will still "highlight" the phrase word by word: - "Duplicated context, context locals, asynchronous processing and propagation"); + // fast-vector highlighter will highlight the phrase: + "Duplicated context, context locals, asynchronous processing and propagation"); } @Test @@ -475,7 +477,7 @@ void findConfigProperty() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::content) .containsOnly( - Set.of("…Environment variable: QUARKUS_VIRTUAL_THREADS_ENABLED Show more boolean true WebSockets Client Type Default quarkus.websocket.max-frame-size…")); + Set.of("…Default quarkus.websocket.max-frame-size The maximum amount of data that can be sent in a single frame. Messages…")); } @Test @@ -488,7 +490,7 @@ void findFQCN() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::content) .containsOnly(Set.of( - "…io.quarkus.deployment.builditem.nativeimage.NativeImageAllowIncompleteClasspathAggregateBuildItem Do not use directly: use instead. boolean allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem…")); + "…allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem No Javadoc found Path…")); } @Test @@ -501,7 +503,7 @@ void findBuildItem() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::content) .containsOnly(Set.of( - "…io.quarkus.deployment.builditem.nativeimage.NativeImageAllowIncompleteClasspathAggregateBuildItem Do not use directly: use instead. boolean allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem…")); + "…allow No Javadoc found io.quarkus.deployment.pkg.builditem.NativeImageBuildItem No Javadoc found Path…")); } @Test @@ -514,7 +516,7 @@ void findAllUppercase() { .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::title) .contains( - "Duplicated context, context locals, asynchronous processing and propagation"); + "Duplicated context, context locals, asynchronous processing and propagation"); } private static ThrowingConsumer hitsHaveCorrectWordHighlighted(AtomicInteger matches, String word, diff --git a/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java index 506eb257..1f606f5c 100644 --- a/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java @@ -41,59 +41,70 @@ void setup() { @ParameterizedTest @MethodSource - void synonymsTitle(String query, String result) { - assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::title) - .contains(result); + void synonymsTitle(String query, Set expectedTitleHighlights) { + var hits = searchHitSearchResult(query).hits(); + assertThat(expectedTitleHighlights) + .allSatisfy(expectedTitleHighlight -> { + assertThat(hits) + .extracting(GuideSearchHit::title) + .anySatisfy(hitTitle -> assertThat(hitTitle).containsIgnoringCase(expectedTitleHighlight)); + }); } private List synonymsTitle() { return List.of( Arguments.of("REST Development Service", - "Dev Services Overview"), + Set.of("Dev Services")), Arguments.of("rest easy", - "Writing REST Services with Quarkus REST (formerly RESTEasy Reactive)"), + Set.of("REST", "RESTEasy")), Arguments.of("vertx", - "Vert.x Reference Guide"), + Set.of("Vert.x")), Arguments.of("rest api", - "Writing REST Services with Quarkus REST (formerly RESTEasy Reactive)"), + Set.of("REST", "RESTEasy")), Arguments.of("config", - "All configuration options"), + Set.of("configuration")), Arguments.of("config option", - "All configuration options"), + Set.of("configuration options")), Arguments.of("jpa", - "Using Hibernate ORM and Jakarta Persistence")); + Set.of("Jakarta Persistence"))); } @ParameterizedTest @MethodSource - void synonymsContent(String query, Set result) { - assertThat(searchHitSearchResult(query).hits()).flatExtracting(GuideSearchHit::content) - .containsAll(result); + void synonymsContent(String query, Set expectedContentHighlights) { + var hits = searchHitSearchResult(query).hits(); + assertThat(expectedContentHighlights) + .allSatisfy(expectedContentHighlight -> { + assertThat(hits) + .flatExtracting(GuideSearchHit::content) + .anySatisfy(hitTitle -> assertThat(hitTitle).containsIgnoringCase(expectedContentHighlight)); + }); } private List synonymsContent() { return List.of( Arguments.of("Development Service", - Set.of("…also offer Dev Services.…", - "…In this case, before starting a container, Dev Services for AMQP looks for a container with the quarkus-dev-service-amqp…")), + Set.of("Dev Services", + "dev-service-amqp")), Arguments.of("dev Service", - Set.of("…also offer Dev Services.…", - "…In this case, before starting a container, Dev Services for AMQP looks for a container with the quarkus-dev-service-amqp…")), + Set.of("Dev Services", + "dev-service-amqp")), Arguments.of("rest easy", - Set.of("…Writing REST Services with Quarkus REST (formerly RESTEasy Reactive) This guide explains how to write…", - "…We recommend doing so at your application entry point boundaries like your REST endpoint controllers.…")), + Set.of("REST", "RESTEasy")), Arguments.of("vertx", - Set.of("…}\n\n} You can inject either the: io.vertx.core.Vertx instance exposing the bare Vert.x API io.vertx.mutiny.core.Vertx…", - "…Access the Vert.x instance To access the managed Vert.x instance, add the quarkus-vertx extension to…")), + Set.of("io.vertx.core.Vertx", + "Vert.x", "vertx")), Arguments.of("rest api", - Set.of("…Writing REST Services with Quarkus REST (formerly RESTEasy Reactive) This guide explains how to write…", - "…We recommend doing so at your application entry point boundaries like your REST endpoint controllers.…"))); + Set.of("REST", "RESTEasy"))); } private static SearchResult searchHitSearchResult(String q) { return given() .queryParam("q", q) - .queryParam("contentSnippets", 2) + // Bumping the number of snippets to give low-score matching terms more chance to appear in highlights. + // This is fine because these tests are not about relevance, + // just about checking that synonyms are detected correctly. + .queryParam("contentSnippets", 10) .when().get(GUIDES_SEARCH) .then() .statusCode(200)