diff --git a/src/main/java/io/quarkus/search/app/SearchService.java b/src/main/java/io/quarkus/search/app/SearchService.java index 3e0b719e..8199c49c 100644 --- a/src/main/java/io/quarkus/search/app/SearchService.java +++ b/src/main/java/io/quarkus/search/app/SearchService.java @@ -15,6 +15,7 @@ import org.hibernate.Length; import org.hibernate.search.engine.search.common.BooleanOperator; +import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag; import org.hibernate.search.mapper.orm.session.SearchSession; import org.jboss.resteasy.reactive.RestQuery; @@ -68,6 +69,9 @@ public SearchResult search(@RestQuery @DefaultValue(QuarkusVersi .field("summary_autocomplete").boost(0.5f) .field("fullContent_autocomplete").boost(0.1f) .matching(q) + // See: https://github.com/elastic/elasticsearch/issues/39905#issuecomment-471578025 + // while the issue is about stopwords the same problem is observed for synonyms on search-analyzer side: + .flags(SimpleQueryFlag.AND, SimpleQueryFlag.OR) .defaultOperator(BooleanOperator.AND)) .should(f.match().field("origin").matching("quarkus").boost(50.0f)) .should(f.not(f.match().field("topics").matching("compatibility")) diff --git a/src/main/java/io/quarkus/search/app/entity/Guide.java b/src/main/java/io/quarkus/search/app/entity/Guide.java index 3e111aa4..b7f881cf 100644 --- a/src/main/java/io/quarkus/search/app/entity/Guide.java +++ b/src/main/java/io/quarkus/search/app/entity/Guide.java @@ -45,24 +45,24 @@ public class Guide { @KeywordField public String origin; - @FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS) - @FullTextField(name = "title_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT) + @FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) + @FullTextField(name = "title_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) @KeywordField(name = "title_sort", normalizer = AnalysisConfigurer.SORT, searchable = Searchable.NO, sortable = Sortable.YES) @Column(length = Length.LONG) public String title; - @FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS) - @FullTextField(name = "summary_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT) + @FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) + @FullTextField(name = "summary_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) @Column(length = Length.LONG32) public String summary; - @FullTextField - @FullTextField(name = "keywords_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT) + @FullTextField(analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) + @FullTextField(name = "keywords_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) @Column(length = Length.LONG32) public String keywords; - @FullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED) - @FullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT) + @FullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) + @FullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) @Transient @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) public InputProvider htmlFullContentProvider; @@ -70,7 +70,7 @@ public class Guide { @KeywordField(name = "categories") public Set categories = Set.of(); - @FullTextField(name = "topics") + @FullTextField(name = "topics", analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH) @KeywordField(name = "topics_faceting", searchable = Searchable.YES, projectable = Projectable.YES, aggregable = Aggregable.YES) public Set topics = Set.of(); diff --git a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java index 5b27f447..57f130d7 100644 --- a/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java +++ b/src/main/java/io/quarkus/search/app/hibernate/AnalysisConfigurer.java @@ -2,13 +2,21 @@ import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurationContext; import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurer; -import org.hibernate.search.engine.backend.analysis.AnalyzerNames; import io.quarkus.hibernate.search.orm.elasticsearch.SearchExtension; @SearchExtension public class AnalysisConfigurer implements ElasticsearchAnalysisConfigurer { - public static final String DEFAULT = AnalyzerNames.DEFAULT; + + private static final String[] SYNONYMS = new String[] { + "development, dev", + "dev service, devservice, development service", + "resteasy, rest, rest api, rest easy", + "vert.x, vertx, vertex" + }; + + public static final String DEFAULT = "basic_analyzer"; + public static final String DEFAULT_SEARCH = DEFAULT + "_search"; public static final String AUTOCOMPLETE = "autocomplete"; public static final String SORT = "sort"; @@ -18,6 +26,20 @@ public void configure(ElasticsearchAnalysisConfigurationContext context) { .tokenizer("standard") .tokenFilters("lowercase", "asciifolding", "stemmer") .charFilters("html_strip"); + + context.analyzer(DEFAULT_SEARCH).custom() + .tokenizer("standard") + // > In general, synonym filters rewrite their inputs to the tokenizer and filters used in the preceding analysis chain + // Note how the synonym filter is added in the end. According to https://www.elastic.co/blog/boosting-the-power-of-elasticsearch-with-synonyms + // preceding filters should get applied to the synonyms we passed to it, so we don't need to bother about normalizing them in some way: + .tokenFilters("lowercase", "asciifolding", "stemmer", "synonyms_graph_filter") + .charFilters("html_strip"); + context.tokenFilter("synonyms_graph_filter") + // See https://www.elastic.co/guide/en/elasticsearch/reference/8.11/analysis-synonym-graph-tokenfilter.html#analysis-synonym-graph-tokenfilter + // synonym_graph works better with multi-word synonyms + .type("synonym_graph") + .param("synonyms", SYNONYMS); + context.analyzer(AUTOCOMPLETE).custom() .tokenizer("standard") .tokenFilters("lowercase", "asciifolding", "stemmer", "autocomplete_edge_ngram") diff --git a/src/test/java/io/quarkus/search/app/SearchServiceTest.java b/src/test/java/io/quarkus/search/app/SearchServiceTest.java index 55c7e3c8..6c67593d 100644 --- a/src/test/java/io/quarkus/search/app/SearchServiceTest.java +++ b/src/test/java/io/quarkus/search/app/SearchServiceTest.java @@ -36,7 +36,7 @@ @QuarkusTest @TestHTTPEndpoint(SearchService.class) @TestInstance(TestInstance.Lifecycle.PER_CLASS) -@QuarkusIOSample.Setup +@QuarkusIOSample.Setup(filter = QuarkusIOSample.SearchServiceFilterDefinition.class) class SearchServiceTest { private static final TypeRef> SEARCH_RESULT_SEARCH_HITS = new TypeRef<>() { }; @@ -132,7 +132,7 @@ void queryMatchingTwoTerms() { void queryEmptyString() { var result = search(""); assertThat(result.hits()).extracting(GuideSearchHit::url) - .containsExactlyInAnyOrder(GuideRef.urls(GuideRef.local())); + .containsExactlyInAnyOrder(GuideRef.urls(QuarkusIOSample.SearchServiceFilterDefinition.guides())); assertThat(result.total()).isEqualTo(10); } @@ -143,7 +143,7 @@ void queryNotProvided() { .statusCode(200) .extract().body().as(SEARCH_RESULT_SEARCH_HITS); assertThat(result.hits()).extracting(GuideSearchHit::url) - .containsExactlyInAnyOrder(GuideRef.urls(GuideRef.local())); + .containsExactlyInAnyOrder(GuideRef.urls(QuarkusIOSample.SearchServiceFilterDefinition.guides())); assertThat(result.total()).isEqualTo(10); } diff --git a/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java new file mode 100644 index 00000000..dfb52bb1 --- /dev/null +++ b/src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java @@ -0,0 +1,107 @@ +package io.quarkus.search.app; + +import static io.restassured.RestAssured.given; +import static io.restassured.RestAssured.when; +import static org.assertj.core.api.Assertions.assertThat; + +import java.time.Duration; +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import io.quarkus.search.app.dto.GuideSearchHit; +import io.quarkus.search.app.dto.SearchResult; +import io.quarkus.search.app.testsupport.QuarkusIOSample; +import io.quarkus.test.common.http.TestHTTPEndpoint; +import io.quarkus.test.junit.QuarkusTest; +import io.restassured.RestAssured; +import io.restassured.common.mapper.TypeRef; +import io.restassured.filter.log.LogDetail; +import org.awaitility.Awaitility; + +@QuarkusTest +@TestHTTPEndpoint(SearchService.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@QuarkusIOSample.Setup(filter = QuarkusIOSample.SearchServiceSynonymsFilterDefinition.class) +class SynonymSearchServiceTest { + private static final TypeRef> SEARCH_RESULT_SEARCH_HITS = new TypeRef<>() { + }; + private static final String GUIDES_SEARCH = "guides/search"; + + protected int managementPort() { + if (getClass().getName().endsWith("IT")) { + return 9000; + } else { + return 9001; + } + } + + @BeforeAll + void waitForIndexing() { + Awaitility.await().timeout(Duration.ofMinutes(1)) + .untilAsserted(() -> when().get("http://localhost:" + managementPort() + "/q/health/ready") + .then() + .statusCode(200)); + RestAssured.enableLoggingOfRequestAndResponseIfValidationFails(LogDetail.BODY); + } + + @ParameterizedTest + @MethodSource + void synonymsTitle(String query, String result) { + assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::title) + .contains(result); + } + + private List synonymsTitle() { + return List.of( + Arguments.of("REST Development Service", + "Dev Services Overview"), + Arguments.of("rest easy", + "Writing REST Services with RESTEasy Reactive"), + Arguments.of("vertx", + "Vert.x Reference Guide"), + Arguments.of("rest api", + "Writing REST Services with RESTEasy Reactive")); + } + + @ParameterizedTest + @MethodSource + void synonymsContent(String query, Set result) { + assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::content) + .contains(result); + } + + private List synonymsContent() { + return List.of( + Arguments.of("Development Service", + Set.of("We refer to this capability as Dev Services.", + "In this case, before starting a container, Dev Services for AMQP looks for a container with the quarkus-dev-service-amqp")), + Arguments.of("dev Service", + Set.of("We refer to this capability as Dev Services.", + "In this case, before starting a container, Dev Services for AMQP looks for a container with the quarkus-dev-service-amqp")), + Arguments.of("rest easy", + Set.of("Writing REST Services with RESTEasy Reactive This guide explains how to write REST Services with RESTEasy", + "Reactive and REST Client Reactive interactions In Quarkus, the RESTEasy Reactive extension and the REST")), + Arguments.of("vertx", + Set.of("Use codecs The https://vertx.io/docs/vertx-core/java/event_bus[Vert.x Event", + "Access the Vert.x instance To access the managed Vert.x instance, add the quarkus-vertx extension to")), + Arguments.of("rest api", + Set.of("Writing REST Services with RESTEasy Reactive This guide explains how to write REST Services with RESTEasy", + "Reactive and REST Client Reactive interactions In Quarkus, the RESTEasy Reactive extension and the REST"))); + } + + private static SearchResult searchHitSearchResult(String q) { + return given() + .queryParam("q", q) + .queryParam("contentSnippets", 2) + .when().get(GUIDES_SEARCH) + .then() + .statusCode(200) + .extract().body().as(SEARCH_RESULT_SEARCH_HITS); + } +} diff --git a/src/test/java/io/quarkus/search/app/testsupport/GuideRef.java b/src/test/java/io/quarkus/search/app/testsupport/GuideRef.java index ae4ecffc..19ad16fb 100644 --- a/src/test/java/io/quarkus/search/app/testsupport/GuideRef.java +++ b/src/test/java/io/quarkus/search/app/testsupport/GuideRef.java @@ -25,6 +25,9 @@ public record GuideRef(String name) { public static final GuideRef HIBERNATE_ORM = create("hibernate-orm"); public static final GuideRef HIBERNATE_REACTIVE = create("hibernate-reactive"); public static final GuideRef STORK_REFERENCE = create("stork-reference"); + public static final GuideRef DEV_SERVICES_REFERENCE = create("dev-services"); + public static final GuideRef RESTEASY_REACTIVE_REFERENCE = create("resteasy-reactive"); + public static final GuideRef VERTX_REFERENCE = create("vertx-reference"); public static final GuideRef QUARKIVERSE_AMAZON_S3 = createQuarkiverse( "https://quarkiverse.github.io/quarkiverse-docs/quarkus-amazon-services/dev/amazon-s3.html"); // NOTE: when adding new constants here, don't forget to run the main() method in QuarkusIOFigure diff --git a/src/test/java/io/quarkus/search/app/testsupport/QuarkusIOSample.java b/src/test/java/io/quarkus/search/app/testsupport/QuarkusIOSample.java index fac48a80..70f8f267 100644 --- a/src/test/java/io/quarkus/search/app/testsupport/QuarkusIOSample.java +++ b/src/test/java/io/quarkus/search/app/testsupport/QuarkusIOSample.java @@ -15,7 +15,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -181,7 +180,7 @@ private static void editIfNecessary(Path quarkusIoLocalPath, Path copyRootPath, copyGit.add().addFilepattern(".").call(); copyGit.commit().setMessage(""" - Edit Quarkus metadata yaml files + Edit Quarkus metadata yaml files %s Edited:%s""" .formatted( @@ -192,9 +191,9 @@ private static void editIfNecessary(Path quarkusIoLocalPath, Path copyRootPath, .call(); } - private static void yamlQuarkusEditor(Path fileToEdit) { + private static void yamlQuarkusEditor(Path fileToEdit, GuideRef[] refs) { yamlQuarkusEditor(fileToEdit, quarkusYaml -> { - Set guideRefs = Arrays.stream(GuideRef.local()).map(GuideRef::name).collect(Collectors.toSet()); + Set guideRefs = Arrays.stream(refs).map(GuideRef::name).collect(Collectors.toSet()); Map filtered = new HashMap<>(); Map> guides = new HashMap<>(); @@ -267,17 +266,65 @@ public String toString() { public abstract void define(FilterDefinitionCollector c); } - public static class AllFilterDefinition extends FilterDefinition { + public static class AllFilterDefinition extends AbstractGuideRefSetFilterDefinition { public AllFilterDefinition() { - super("all"); + super("all", GuideRef.local()); + } + } + + public static class SearchServiceFilterDefinition extends AbstractGuideRefSetFilterDefinition { + private static final GuideRef[] GUIDES = new GuideRef[] { + GuideRef.HIBERNATE_ORM, + GuideRef.HIBERNATE_ORM_PANACHE, + GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN, + GuideRef.HIBERNATE_SEARCH_ORM_ELASTICSEARCH, + GuideRef.HIBERNATE_REACTIVE, + GuideRef.HIBERNATE_REACTIVE_PANACHE, + GuideRef.SPRING_DATA_JPA, + GuideRef.DUPLICATED_CONTEXT, + GuideRef.SECURITY_OIDC_BEARER_TOKEN_AUTHENTICATION, + GuideRef.STORK_REFERENCE + }; + + public static GuideRef[] guides() { + return GUIDES; + } + + public SearchServiceFilterDefinition() { + super("search-service-subset", GUIDES); + } + } + + public static class SearchServiceSynonymsFilterDefinition extends AbstractGuideRefSetFilterDefinition { + private static final GuideRef[] GUIDES = new GuideRef[] { + GuideRef.RESTEASY_REACTIVE_REFERENCE, + GuideRef.VERTX_REFERENCE, + GuideRef.DEV_SERVICES_REFERENCE + }; + + public static GuideRef[] guides() { + return GUIDES; + } + + public SearchServiceSynonymsFilterDefinition() { + super("search-service-synonyms-subset", GUIDES); + } + } + + private static abstract class AbstractGuideRefSetFilterDefinition extends FilterDefinition { + private final GuideRef[] guides; + + protected AbstractGuideRefSetFilterDefinition(String name, GuideRef... guides) { + super(name); + this.guides = guides; } @Override public void define(FilterDefinitionCollector c) { - c.addMetadata(QuarkusVersions.LATEST); - c.addMetadata(SAMPLED_NON_LATEST_VERSION); + c.addMetadata(QuarkusVersions.LATEST, guides); + c.addMetadata(SAMPLED_NON_LATEST_VERSION, guides); c.addQuarkiverseMetadata(SAMPLED_NON_LATEST_VERSION); - for (GuideRef guideRef : GuideRef.local()) { + for (GuideRef guideRef : guides) { c.addGuide(guideRef); c.addGuide(guideRef, SAMPLED_NON_LATEST_VERSION); } @@ -303,10 +350,10 @@ public FilterDefinitionCollector addGuide(GuideRef ref, String version) { return this; } - public FilterDefinitionCollector addMetadata(String version) { + public FilterDefinitionCollector addMetadata(String version, GuideRef[] guides) { String metadataPath = QuarkusIO.yamlMetadataPath(version).toString(); addOnSourceBranch(metadataPath, metadataPath); - addMetadataToFilter(metadataPath, QuarkusIOSample::yamlQuarkusEditor); + addMetadataToFilter(metadataPath, path -> yamlQuarkusEditor(path, guides)); return this; } diff --git a/src/test/resources/quarkusio-sample.zip b/src/test/resources/quarkusio-sample.zip index a0699856..6e8c0ad3 100644 Binary files a/src/test/resources/quarkusio-sample.zip and b/src/test/resources/quarkusio-sample.zip differ