Skip to content

Commit

Permalink
Merge pull request #74 from marko-bekhta/i59-add-synonyms
Browse files Browse the repository at this point in the history
Add synonyms filter to the analysis configuration
  • Loading branch information
yrodiere authored Dec 8, 2023
2 parents ac86060 + 9428d5d commit e9b5d8d
Show file tree
Hide file tree
Showing 8 changed files with 208 additions and 25 deletions.
4 changes: 4 additions & 0 deletions src/main/java/io/quarkus/search/app/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import org.hibernate.Length;
import org.hibernate.search.engine.search.common.BooleanOperator;
import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag;
import org.hibernate.search.mapper.orm.session.SearchSession;

import org.jboss.resteasy.reactive.RestQuery;
Expand Down Expand Up @@ -68,6 +69,9 @@ public SearchResult<GuideSearchHit> search(@RestQuery @DefaultValue(QuarkusVersi
.field("summary_autocomplete").boost(0.5f)
.field("fullContent_autocomplete").boost(0.1f)
.matching(q)
// See: https://github.com/elastic/elasticsearch/issues/39905#issuecomment-471578025
// while the issue is about stopwords the same problem is observed for synonyms on search-analyzer side:
.flags(SimpleQueryFlag.AND, SimpleQueryFlag.OR)
.defaultOperator(BooleanOperator.AND))
.should(f.match().field("origin").matching("quarkus").boost(50.0f))
.should(f.not(f.match().field("topics").matching("compatibility"))
Expand Down
18 changes: 9 additions & 9 deletions src/main/java/io/quarkus/search/app/entity/Guide.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,32 +45,32 @@ public class Guide {
@KeywordField
public String origin;

@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS)
@FullTextField(name = "title_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "title_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@KeywordField(name = "title_sort", normalizer = AnalysisConfigurer.SORT, searchable = Searchable.NO, sortable = Sortable.YES)
@Column(length = Length.LONG)
public String title;

@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS)
@FullTextField(name = "summary_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "summary_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@Column(length = Length.LONG32)
public String summary;

@FullTextField
@FullTextField(name = "keywords_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "keywords_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@Column(length = Length.LONG32)
public String keywords;

@FullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED)
@FullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@Transient
@IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO)
public InputProvider htmlFullContentProvider;

@KeywordField(name = "categories")
public Set<String> categories = Set.of();

@FullTextField(name = "topics")
@FullTextField(name = "topics", analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@KeywordField(name = "topics_faceting", searchable = Searchable.YES, projectable = Projectable.YES, aggregable = Aggregable.YES)
public Set<String> topics = Set.of();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@

import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurationContext;
import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurer;
import org.hibernate.search.engine.backend.analysis.AnalyzerNames;

import io.quarkus.hibernate.search.orm.elasticsearch.SearchExtension;

@SearchExtension
public class AnalysisConfigurer implements ElasticsearchAnalysisConfigurer {
public static final String DEFAULT = AnalyzerNames.DEFAULT;

private static final String[] SYNONYMS = new String[] {
"development, dev",
"dev service, devservice, development service",
"resteasy, rest, rest api, rest easy",
"vert.x, vertx, vertex"
};

public static final String DEFAULT = "basic_analyzer";
public static final String DEFAULT_SEARCH = DEFAULT + "_search";
public static final String AUTOCOMPLETE = "autocomplete";
public static final String SORT = "sort";

Expand All @@ -18,6 +26,20 @@ public void configure(ElasticsearchAnalysisConfigurationContext context) {
.tokenizer("standard")
.tokenFilters("lowercase", "asciifolding", "stemmer")
.charFilters("html_strip");

context.analyzer(DEFAULT_SEARCH).custom()
.tokenizer("standard")
// > In general, synonym filters rewrite their inputs to the tokenizer and filters used in the preceding analysis chain
// Note how the synonym filter is added in the end. According to https://www.elastic.co/blog/boosting-the-power-of-elasticsearch-with-synonyms
// preceding filters should get applied to the synonyms we passed to it, so we don't need to bother about normalizing them in some way:
.tokenFilters("lowercase", "asciifolding", "stemmer", "synonyms_graph_filter")
.charFilters("html_strip");
context.tokenFilter("synonyms_graph_filter")
// See https://www.elastic.co/guide/en/elasticsearch/reference/8.11/analysis-synonym-graph-tokenfilter.html#analysis-synonym-graph-tokenfilter
// synonym_graph works better with multi-word synonyms
.type("synonym_graph")
.param("synonyms", SYNONYMS);

context.analyzer(AUTOCOMPLETE).custom()
.tokenizer("standard")
.tokenFilters("lowercase", "asciifolding", "stemmer", "autocomplete_edge_ngram")
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/io/quarkus/search/app/SearchServiceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
@QuarkusTest
@TestHTTPEndpoint(SearchService.class)
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@QuarkusIOSample.Setup
@QuarkusIOSample.Setup(filter = QuarkusIOSample.SearchServiceFilterDefinition.class)
class SearchServiceTest {
private static final TypeRef<SearchResult<GuideSearchHit>> SEARCH_RESULT_SEARCH_HITS = new TypeRef<>() {
};
Expand Down Expand Up @@ -132,7 +132,7 @@ void queryMatchingTwoTerms() {
void queryEmptyString() {
var result = search("");
assertThat(result.hits()).extracting(GuideSearchHit::url)
.containsExactlyInAnyOrder(GuideRef.urls(GuideRef.local()));
.containsExactlyInAnyOrder(GuideRef.urls(QuarkusIOSample.SearchServiceFilterDefinition.guides()));
assertThat(result.total()).isEqualTo(10);
}

Expand All @@ -143,7 +143,7 @@ void queryNotProvided() {
.statusCode(200)
.extract().body().as(SEARCH_RESULT_SEARCH_HITS);
assertThat(result.hits()).extracting(GuideSearchHit::url)
.containsExactlyInAnyOrder(GuideRef.urls(GuideRef.local()));
.containsExactlyInAnyOrder(GuideRef.urls(QuarkusIOSample.SearchServiceFilterDefinition.guides()));
assertThat(result.total()).isEqualTo(10);
}

Expand Down
107 changes: 107 additions & 0 deletions src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package io.quarkus.search.app;

import static io.restassured.RestAssured.given;
import static io.restassured.RestAssured.when;
import static org.assertj.core.api.Assertions.assertThat;

import java.time.Duration;
import java.util.List;
import java.util.Set;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import io.quarkus.search.app.dto.GuideSearchHit;
import io.quarkus.search.app.dto.SearchResult;
import io.quarkus.search.app.testsupport.QuarkusIOSample;
import io.quarkus.test.common.http.TestHTTPEndpoint;
import io.quarkus.test.junit.QuarkusTest;
import io.restassured.RestAssured;
import io.restassured.common.mapper.TypeRef;
import io.restassured.filter.log.LogDetail;
import org.awaitility.Awaitility;

@QuarkusTest
@TestHTTPEndpoint(SearchService.class)
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@QuarkusIOSample.Setup(filter = QuarkusIOSample.SearchServiceSynonymsFilterDefinition.class)
class SynonymSearchServiceTest {
private static final TypeRef<SearchResult<GuideSearchHit>> SEARCH_RESULT_SEARCH_HITS = new TypeRef<>() {
};
private static final String GUIDES_SEARCH = "guides/search";

protected int managementPort() {
if (getClass().getName().endsWith("IT")) {
return 9000;
} else {
return 9001;
}
}

@BeforeAll
void waitForIndexing() {
Awaitility.await().timeout(Duration.ofMinutes(1))
.untilAsserted(() -> when().get("http://localhost:" + managementPort() + "/q/health/ready")
.then()
.statusCode(200));
RestAssured.enableLoggingOfRequestAndResponseIfValidationFails(LogDetail.BODY);
}

@ParameterizedTest
@MethodSource
void synonymsTitle(String query, String result) {
assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::title)
.contains(result);
}

private List<? extends Arguments> synonymsTitle() {
return List.of(
Arguments.of("REST Development Service",
"<span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span> Overview"),
Arguments.of("rest easy",
"Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive"),
Arguments.of("vertx",
"<span class=\"highlighted\">Vert.x</span> Reference Guide"),
Arguments.of("rest api",
"Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive"));
}

@ParameterizedTest
@MethodSource
void synonymsContent(String query, Set<String> result) {
assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::content)
.contains(result);
}

private List<? extends Arguments> synonymsContent() {
return List.of(
Arguments.of("Development Service",
Set.of("We refer to this capability as <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span>.",
"In this case, before starting a container, <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span> for AMQP looks for a container with the quarkus-<span class=\"highlighted\">dev</span>-<span class=\"highlighted\">service</span>-amqp")),
Arguments.of("dev Service",
Set.of("We refer to this capability as <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span>.",
"In this case, before starting a container, <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span> for AMQP looks for a container with the quarkus-<span class=\"highlighted\">dev</span>-<span class=\"highlighted\">service</span>-amqp")),
Arguments.of("rest easy",
Set.of("Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive This guide explains how to write <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span>",
"Reactive and <span class=\"highlighted\">REST</span> Client Reactive interactions In Quarkus, the <span class=\"highlighted\">RESTEasy</span> Reactive extension and the <span class=\"highlighted\">REST</span>")),
Arguments.of("vertx",
Set.of("Use codecs The https:&#x2F;&#x2F;vertx.io&#x2F;docs&#x2F;<span class=\"highlighted\">vertx</span>-core&#x2F;java&#x2F;event_bus[<span class=\"highlighted\">Vert.x</span> Event",
"Access the <span class=\"highlighted\">Vert.x</span> instance To access the managed <span class=\"highlighted\">Vert.x</span> instance, add the quarkus-<span class=\"highlighted\">vertx</span> extension to")),
Arguments.of("rest api",
Set.of("Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive This guide explains how to write <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span>",
"Reactive and <span class=\"highlighted\">REST</span> Client Reactive interactions In Quarkus, the <span class=\"highlighted\">RESTEasy</span> Reactive extension and the <span class=\"highlighted\">REST</span>")));
}

private static SearchResult<GuideSearchHit> searchHitSearchResult(String q) {
return given()
.queryParam("q", q)
.queryParam("contentSnippets", 2)
.when().get(GUIDES_SEARCH)
.then()
.statusCode(200)
.extract().body().as(SEARCH_RESULT_SEARCH_HITS);
}
}
3 changes: 3 additions & 0 deletions src/test/java/io/quarkus/search/app/testsupport/GuideRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public record GuideRef(String name) {
public static final GuideRef HIBERNATE_ORM = create("hibernate-orm");
public static final GuideRef HIBERNATE_REACTIVE = create("hibernate-reactive");
public static final GuideRef STORK_REFERENCE = create("stork-reference");
public static final GuideRef DEV_SERVICES_REFERENCE = create("dev-services");
public static final GuideRef RESTEASY_REACTIVE_REFERENCE = create("resteasy-reactive");
public static final GuideRef VERTX_REFERENCE = create("vertx-reference");
public static final GuideRef QUARKIVERSE_AMAZON_S3 = createQuarkiverse(
"https://quarkiverse.github.io/quarkiverse-docs/quarkus-amazon-services/dev/amazon-s3.html");
// NOTE: when adding new constants here, don't forget to run the main() method in QuarkusIOFigure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -181,7 +180,7 @@ private static void editIfNecessary(Path quarkusIoLocalPath, Path copyRootPath,

copyGit.add().addFilepattern(".").call();
copyGit.commit().setMessage("""
Edit Quarkus metadata yaml files
Edit Quarkus metadata yaml files %s
Edited:%s"""
.formatted(
Expand All @@ -192,9 +191,9 @@ private static void editIfNecessary(Path quarkusIoLocalPath, Path copyRootPath,
.call();
}

private static void yamlQuarkusEditor(Path fileToEdit) {
private static void yamlQuarkusEditor(Path fileToEdit, GuideRef[] refs) {
yamlQuarkusEditor(fileToEdit, quarkusYaml -> {
Set<String> guideRefs = Arrays.stream(GuideRef.local()).map(GuideRef::name).collect(Collectors.toSet());
Set<String> guideRefs = Arrays.stream(refs).map(GuideRef::name).collect(Collectors.toSet());

Map<String, Object> filtered = new HashMap<>();
Map<String, List<Object>> guides = new HashMap<>();
Expand Down Expand Up @@ -267,17 +266,65 @@ public String toString() {
public abstract void define(FilterDefinitionCollector c);
}

public static class AllFilterDefinition extends FilterDefinition {
public static class AllFilterDefinition extends AbstractGuideRefSetFilterDefinition {
public AllFilterDefinition() {
super("all");
super("all", GuideRef.local());
}
}

public static class SearchServiceFilterDefinition extends AbstractGuideRefSetFilterDefinition {
private static final GuideRef[] GUIDES = new GuideRef[] {
GuideRef.HIBERNATE_ORM,
GuideRef.HIBERNATE_ORM_PANACHE,
GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN,
GuideRef.HIBERNATE_SEARCH_ORM_ELASTICSEARCH,
GuideRef.HIBERNATE_REACTIVE,
GuideRef.HIBERNATE_REACTIVE_PANACHE,
GuideRef.SPRING_DATA_JPA,
GuideRef.DUPLICATED_CONTEXT,
GuideRef.SECURITY_OIDC_BEARER_TOKEN_AUTHENTICATION,
GuideRef.STORK_REFERENCE
};

public static GuideRef[] guides() {
return GUIDES;
}

public SearchServiceFilterDefinition() {
super("search-service-subset", GUIDES);
}
}

public static class SearchServiceSynonymsFilterDefinition extends AbstractGuideRefSetFilterDefinition {
private static final GuideRef[] GUIDES = new GuideRef[] {
GuideRef.RESTEASY_REACTIVE_REFERENCE,
GuideRef.VERTX_REFERENCE,
GuideRef.DEV_SERVICES_REFERENCE
};

public static GuideRef[] guides() {
return GUIDES;
}

public SearchServiceSynonymsFilterDefinition() {
super("search-service-synonyms-subset", GUIDES);
}
}

private static abstract class AbstractGuideRefSetFilterDefinition extends FilterDefinition {
private final GuideRef[] guides;

protected AbstractGuideRefSetFilterDefinition(String name, GuideRef... guides) {
super(name);
this.guides = guides;
}

@Override
public void define(FilterDefinitionCollector c) {
c.addMetadata(QuarkusVersions.LATEST);
c.addMetadata(SAMPLED_NON_LATEST_VERSION);
c.addMetadata(QuarkusVersions.LATEST, guides);
c.addMetadata(SAMPLED_NON_LATEST_VERSION, guides);
c.addQuarkiverseMetadata(SAMPLED_NON_LATEST_VERSION);
for (GuideRef guideRef : GuideRef.local()) {
for (GuideRef guideRef : guides) {
c.addGuide(guideRef);
c.addGuide(guideRef, SAMPLED_NON_LATEST_VERSION);
}
Expand All @@ -303,10 +350,10 @@ public FilterDefinitionCollector addGuide(GuideRef ref, String version) {
return this;
}

public FilterDefinitionCollector addMetadata(String version) {
public FilterDefinitionCollector addMetadata(String version, GuideRef[] guides) {
String metadataPath = QuarkusIO.yamlMetadataPath(version).toString();
addOnSourceBranch(metadataPath, metadataPath);
addMetadataToFilter(metadataPath, QuarkusIOSample::yamlQuarkusEditor);
addMetadataToFilter(metadataPath, path -> yamlQuarkusEditor(path, guides));
return this;
}

Expand Down
Binary file modified src/test/resources/quarkusio-sample.zip
Binary file not shown.

0 comments on commit e9b5d8d

Please sign in to comment.