Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add synonyms filter to the analysis configuration #74

Merged
merged 1 commit into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/java/io/quarkus/search/app/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import org.hibernate.Length;
import org.hibernate.search.engine.search.common.BooleanOperator;
import org.hibernate.search.engine.search.predicate.dsl.SimpleQueryFlag;
import org.hibernate.search.mapper.orm.session.SearchSession;

import org.jboss.resteasy.reactive.RestQuery;
Expand Down Expand Up @@ -68,6 +69,9 @@ public SearchResult<GuideSearchHit> search(@RestQuery @DefaultValue(QuarkusVersi
.field("summary_autocomplete").boost(0.5f)
.field("fullContent_autocomplete").boost(0.1f)
.matching(q)
// See: https://github.com/elastic/elasticsearch/issues/39905#issuecomment-471578025
// while the issue is about stopwords the same problem is observed for synonyms on search-analyzer side:
.flags(SimpleQueryFlag.AND, SimpleQueryFlag.OR)
yrodiere marked this conversation as resolved.
Show resolved Hide resolved
.defaultOperator(BooleanOperator.AND))
.should(f.match().field("origin").matching("quarkus").boost(50.0f))
.should(f.not(f.match().field("topics").matching("compatibility"))
Expand Down
18 changes: 9 additions & 9 deletions src/main/java/io/quarkus/search/app/entity/Guide.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,32 +45,32 @@ public class Guide {
@KeywordField
public String origin;

@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS)
@FullTextField(name = "title_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "title_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@KeywordField(name = "title_sort", normalizer = AnalysisConfigurer.SORT, searchable = Searchable.NO, sortable = Sortable.YES)
@Column(length = Length.LONG)
public String title;

@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS)
@FullTextField(name = "summary_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(highlightable = Highlightable.UNIFIED, termVector = TermVector.WITH_POSITIONS_OFFSETS, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "summary_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@Column(length = Length.LONG32)
public String summary;

@FullTextField
@FullTextField(name = "keywords_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "keywords_autocomplete", analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@Column(length = Length.LONG32)
public String keywords;

@FullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED)
@FullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT)
@FullTextField(name = "fullContent", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), highlightable = Highlightable.UNIFIED, analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@FullTextField(name = "fullContent_autocomplete", valueBridge = @ValueBridgeRef(type = InputProviderHtmlBodyTextBridge.class), analyzer = AnalysisConfigurer.AUTOCOMPLETE, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@Transient
@IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO)
public InputProvider htmlFullContentProvider;

@KeywordField(name = "categories")
public Set<String> categories = Set.of();

@FullTextField(name = "topics")
@FullTextField(name = "topics", analyzer = AnalysisConfigurer.DEFAULT, searchAnalyzer = AnalysisConfigurer.DEFAULT_SEARCH)
@KeywordField(name = "topics_faceting", searchable = Searchable.YES, projectable = Projectable.YES, aggregable = Aggregable.YES)
public Set<String> topics = Set.of();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@

import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurationContext;
import org.hibernate.search.backend.elasticsearch.analysis.ElasticsearchAnalysisConfigurer;
import org.hibernate.search.engine.backend.analysis.AnalyzerNames;

import io.quarkus.hibernate.search.orm.elasticsearch.SearchExtension;

@SearchExtension
public class AnalysisConfigurer implements ElasticsearchAnalysisConfigurer {
public static final String DEFAULT = AnalyzerNames.DEFAULT;

private static final String[] SYNONYMS = new String[] {
"development, dev",
"dev service, devservice, development service",
"resteasy, rest, rest api, rest easy",
"vert.x, vertx, vertex"
};

public static final String DEFAULT = "basic_analyzer";
public static final String DEFAULT_SEARCH = DEFAULT + "_search";
public static final String AUTOCOMPLETE = "autocomplete";
public static final String SORT = "sort";

Expand All @@ -18,6 +26,20 @@ public void configure(ElasticsearchAnalysisConfigurationContext context) {
.tokenizer("standard")
.tokenFilters("lowercase", "asciifolding", "stemmer")
.charFilters("html_strip");

context.analyzer(DEFAULT_SEARCH).custom()
.tokenizer("standard")
// > In general, synonym filters rewrite their inputs to the tokenizer and filters used in the preceding analysis chain
// Note how the synonym filter is added in the end. According to https://www.elastic.co/blog/boosting-the-power-of-elasticsearch-with-synonyms
// preceding filters should get applied to the synonyms we passed to it, so we don't need to bother about normalizing them in some way:
.tokenFilters("lowercase", "asciifolding", "stemmer", "synonyms_graph_filter")
yrodiere marked this conversation as resolved.
Show resolved Hide resolved
.charFilters("html_strip");
context.tokenFilter("synonyms_graph_filter")
// See https://www.elastic.co/guide/en/elasticsearch/reference/8.11/analysis-synonym-graph-tokenfilter.html#analysis-synonym-graph-tokenfilter
// synonym_graph works better with multi-word synonyms
.type("synonym_graph")
.param("synonyms", SYNONYMS);

context.analyzer(AUTOCOMPLETE).custom()
.tokenizer("standard")
.tokenFilters("lowercase", "asciifolding", "stemmer", "autocomplete_edge_ngram")
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/io/quarkus/search/app/SearchServiceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
@QuarkusTest
@TestHTTPEndpoint(SearchService.class)
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@QuarkusIOSample.Setup
@QuarkusIOSample.Setup(filter = QuarkusIOSample.SearchServiceFilterDefinition.class)
class SearchServiceTest {
private static final TypeRef<SearchResult<GuideSearchHit>> SEARCH_RESULT_SEARCH_HITS = new TypeRef<>() {
};
Expand Down Expand Up @@ -132,7 +132,7 @@ void queryMatchingTwoTerms() {
void queryEmptyString() {
var result = search("");
assertThat(result.hits()).extracting(GuideSearchHit::url)
.containsExactlyInAnyOrder(GuideRef.urls(GuideRef.local()));
.containsExactlyInAnyOrder(GuideRef.urls(QuarkusIOSample.SearchServiceFilterDefinition.guides()));
assertThat(result.total()).isEqualTo(10);
}

Expand All @@ -143,7 +143,7 @@ void queryNotProvided() {
.statusCode(200)
.extract().body().as(SEARCH_RESULT_SEARCH_HITS);
assertThat(result.hits()).extracting(GuideSearchHit::url)
.containsExactlyInAnyOrder(GuideRef.urls(GuideRef.local()));
.containsExactlyInAnyOrder(GuideRef.urls(QuarkusIOSample.SearchServiceFilterDefinition.guides()));
assertThat(result.total()).isEqualTo(10);
}

Expand Down
107 changes: 107 additions & 0 deletions src/test/java/io/quarkus/search/app/SynonymSearchServiceTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package io.quarkus.search.app;

import static io.restassured.RestAssured.given;
import static io.restassured.RestAssured.when;
import static org.assertj.core.api.Assertions.assertThat;

import java.time.Duration;
import java.util.List;
import java.util.Set;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import io.quarkus.search.app.dto.GuideSearchHit;
import io.quarkus.search.app.dto.SearchResult;
import io.quarkus.search.app.testsupport.QuarkusIOSample;
import io.quarkus.test.common.http.TestHTTPEndpoint;
import io.quarkus.test.junit.QuarkusTest;
import io.restassured.RestAssured;
import io.restassured.common.mapper.TypeRef;
import io.restassured.filter.log.LogDetail;
import org.awaitility.Awaitility;

@QuarkusTest
@TestHTTPEndpoint(SearchService.class)
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@QuarkusIOSample.Setup(filter = QuarkusIOSample.SearchServiceSynonymsFilterDefinition.class)
class SynonymSearchServiceTest {
private static final TypeRef<SearchResult<GuideSearchHit>> SEARCH_RESULT_SEARCH_HITS = new TypeRef<>() {
};
private static final String GUIDES_SEARCH = "guides/search";

protected int managementPort() {
if (getClass().getName().endsWith("IT")) {
return 9000;
} else {
return 9001;
}
}

@BeforeAll
void waitForIndexing() {
Awaitility.await().timeout(Duration.ofMinutes(1))
.untilAsserted(() -> when().get("http://localhost:" + managementPort() + "/q/health/ready")
.then()
.statusCode(200));
RestAssured.enableLoggingOfRequestAndResponseIfValidationFails(LogDetail.BODY);
}

@ParameterizedTest
@MethodSource
void synonymsTitle(String query, String result) {
assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::title)
.contains(result);
}

private List<? extends Arguments> synonymsTitle() {
return List.of(
Arguments.of("REST Development Service",
"<span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span> Overview"),
Arguments.of("rest easy",
"Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive"),
Arguments.of("vertx",
"<span class=\"highlighted\">Vert.x</span> Reference Guide"),
Arguments.of("rest api",
"Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive"));
}

@ParameterizedTest
@MethodSource
void synonymsContent(String query, Set<String> result) {
assertThat(searchHitSearchResult(query).hits()).extracting(GuideSearchHit::content)
.contains(result);
}

private List<? extends Arguments> synonymsContent() {
return List.of(
Arguments.of("Development Service",
Set.of("We refer to this capability as <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span>.",
"In this case, before starting a container, <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span> for AMQP looks for a container with the quarkus-<span class=\"highlighted\">dev</span>-<span class=\"highlighted\">service</span>-amqp")),
Arguments.of("dev Service",
Set.of("We refer to this capability as <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span>.",
"In this case, before starting a container, <span class=\"highlighted\">Dev</span> <span class=\"highlighted\">Services</span> for AMQP looks for a container with the quarkus-<span class=\"highlighted\">dev</span>-<span class=\"highlighted\">service</span>-amqp")),
Arguments.of("rest easy",
Set.of("Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive This guide explains how to write <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span>",
"Reactive and <span class=\"highlighted\">REST</span> Client Reactive interactions In Quarkus, the <span class=\"highlighted\">RESTEasy</span> Reactive extension and the <span class=\"highlighted\">REST</span>")),
Arguments.of("vertx",
Set.of("Use codecs The https:&#x2F;&#x2F;vertx.io&#x2F;docs&#x2F;<span class=\"highlighted\">vertx</span>-core&#x2F;java&#x2F;event_bus[<span class=\"highlighted\">Vert.x</span> Event",
"Access the <span class=\"highlighted\">Vert.x</span> instance To access the managed <span class=\"highlighted\">Vert.x</span> instance, add the quarkus-<span class=\"highlighted\">vertx</span> extension to")),
Arguments.of("rest api",
Set.of("Writing <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span> Reactive This guide explains how to write <span class=\"highlighted\">REST</span> Services with <span class=\"highlighted\">RESTEasy</span>",
"Reactive and <span class=\"highlighted\">REST</span> Client Reactive interactions In Quarkus, the <span class=\"highlighted\">RESTEasy</span> Reactive extension and the <span class=\"highlighted\">REST</span>")));
}

private static SearchResult<GuideSearchHit> searchHitSearchResult(String q) {
return given()
.queryParam("q", q)
.queryParam("contentSnippets", 2)
.when().get(GUIDES_SEARCH)
.then()
.statusCode(200)
.extract().body().as(SEARCH_RESULT_SEARCH_HITS);
}
}
3 changes: 3 additions & 0 deletions src/test/java/io/quarkus/search/app/testsupport/GuideRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public record GuideRef(String name) {
public static final GuideRef HIBERNATE_ORM = create("hibernate-orm");
public static final GuideRef HIBERNATE_REACTIVE = create("hibernate-reactive");
public static final GuideRef STORK_REFERENCE = create("stork-reference");
public static final GuideRef DEV_SERVICES_REFERENCE = create("dev-services");
public static final GuideRef RESTEASY_REACTIVE_REFERENCE = create("resteasy-reactive");
public static final GuideRef VERTX_REFERENCE = create("vertx-reference");
public static final GuideRef QUARKIVERSE_AMAZON_S3 = createQuarkiverse(
"https://quarkiverse.github.io/quarkiverse-docs/quarkus-amazon-services/dev/amazon-s3.html");
// NOTE: when adding new constants here, don't forget to run the main() method in QuarkusIOFigure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -181,7 +180,7 @@ private static void editIfNecessary(Path quarkusIoLocalPath, Path copyRootPath,

copyGit.add().addFilepattern(".").call();
copyGit.commit().setMessage("""
Edit Quarkus metadata yaml files
Edit Quarkus metadata yaml files %s

Edited:%s"""
.formatted(
Expand All @@ -192,9 +191,9 @@ private static void editIfNecessary(Path quarkusIoLocalPath, Path copyRootPath,
.call();
}

private static void yamlQuarkusEditor(Path fileToEdit) {
private static void yamlQuarkusEditor(Path fileToEdit, GuideRef[] refs) {
yamlQuarkusEditor(fileToEdit, quarkusYaml -> {
Set<String> guideRefs = Arrays.stream(GuideRef.local()).map(GuideRef::name).collect(Collectors.toSet());
Set<String> guideRefs = Arrays.stream(refs).map(GuideRef::name).collect(Collectors.toSet());

Map<String, Object> filtered = new HashMap<>();
Map<String, List<Object>> guides = new HashMap<>();
Expand Down Expand Up @@ -267,17 +266,65 @@ public String toString() {
public abstract void define(FilterDefinitionCollector c);
}

public static class AllFilterDefinition extends FilterDefinition {
public static class AllFilterDefinition extends AbstractGuideRefSetFilterDefinition {
public AllFilterDefinition() {
super("all");
super("all", GuideRef.local());
}
}

public static class SearchServiceFilterDefinition extends AbstractGuideRefSetFilterDefinition {
private static final GuideRef[] GUIDES = new GuideRef[] {
GuideRef.HIBERNATE_ORM,
GuideRef.HIBERNATE_ORM_PANACHE,
GuideRef.HIBERNATE_ORM_PANACHE_KOTLIN,
GuideRef.HIBERNATE_SEARCH_ORM_ELASTICSEARCH,
GuideRef.HIBERNATE_REACTIVE,
GuideRef.HIBERNATE_REACTIVE_PANACHE,
GuideRef.SPRING_DATA_JPA,
GuideRef.DUPLICATED_CONTEXT,
GuideRef.SECURITY_OIDC_BEARER_TOKEN_AUTHENTICATION,
GuideRef.STORK_REFERENCE
};

public static GuideRef[] guides() {
return GUIDES;
}

public SearchServiceFilterDefinition() {
super("search-service-subset", GUIDES);
}
}

public static class SearchServiceSynonymsFilterDefinition extends AbstractGuideRefSetFilterDefinition {
private static final GuideRef[] GUIDES = new GuideRef[] {
GuideRef.RESTEASY_REACTIVE_REFERENCE,
GuideRef.VERTX_REFERENCE,
GuideRef.DEV_SERVICES_REFERENCE
};

public static GuideRef[] guides() {
return GUIDES;
}

public SearchServiceSynonymsFilterDefinition() {
super("search-service-synonyms-subset", GUIDES);
}
}

private static abstract class AbstractGuideRefSetFilterDefinition extends FilterDefinition {
private final GuideRef[] guides;

protected AbstractGuideRefSetFilterDefinition(String name, GuideRef... guides) {
super(name);
this.guides = guides;
}

@Override
public void define(FilterDefinitionCollector c) {
c.addMetadata(QuarkusVersions.LATEST);
c.addMetadata(SAMPLED_NON_LATEST_VERSION);
c.addMetadata(QuarkusVersions.LATEST, guides);
c.addMetadata(SAMPLED_NON_LATEST_VERSION, guides);
c.addQuarkiverseMetadata(SAMPLED_NON_LATEST_VERSION);
for (GuideRef guideRef : GuideRef.local()) {
for (GuideRef guideRef : guides) {
c.addGuide(guideRef);
c.addGuide(guideRef, SAMPLED_NON_LATEST_VERSION);
}
Expand All @@ -303,10 +350,10 @@ public FilterDefinitionCollector addGuide(GuideRef ref, String version) {
return this;
}

public FilterDefinitionCollector addMetadata(String version) {
public FilterDefinitionCollector addMetadata(String version, GuideRef[] guides) {
String metadataPath = QuarkusIO.yamlMetadataPath(version).toString();
addOnSourceBranch(metadataPath, metadataPath);
addMetadataToFilter(metadataPath, QuarkusIOSample::yamlQuarkusEditor);
addMetadataToFilter(metadataPath, path -> yamlQuarkusEditor(path, guides));
return this;
}

Expand Down
Binary file modified src/test/resources/quarkusio-sample.zip
Binary file not shown.
Loading