Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid indexing duplicate documents for Quarkiverse guides that are not translated #157

Merged
merged 2 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/main/java/io/quarkus/search/app/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import io.quarkus.search.app.dto.SearchResult;
import io.quarkus.search.app.entity.Guide;
import io.quarkus.search.app.entity.Language;
import io.quarkus.search.app.entity.VersionAndLanguageRoutingBinder;
import io.quarkus.search.app.entity.QuarkusVersionAndLanguageRoutingBinder;

import org.hibernate.Length;
import org.hibernate.search.engine.search.common.BooleanOperator;
Expand Down Expand Up @@ -105,7 +105,7 @@ public SearchResult<GuideSearchHit> search(@RestQuery @DefaultValue(QuarkusVersi
.highlighter("highlighter_content",
f -> f.unified().noMatchSize(0).numberOfFragments(contentSnippets).fragmentSize(contentSnippetsLength))
.sort(f -> f.score().then().field("title_sort"))
.routing(VersionAndLanguageRoutingBinder.key(version, language))
.routing(QuarkusVersionAndLanguageRoutingBinder.searchKeys(version, language))
.totalHitCountThreshold(TOTAL_HIT_COUNT_THRESHOLD + (page + 1) * PAGE_SIZE)
.fetch(page * PAGE_SIZE, PAGE_SIZE);
return new SearchResult<>(result);
Expand Down
15 changes: 3 additions & 12 deletions src/main/java/io/quarkus/search/app/entity/Guide.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package io.quarkus.search.app.entity;

import static io.quarkus.search.app.quarkusio.QuarkusIO.QUARKUS_ORIGIN;

import java.net.URI;
import java.util.Objects;
import java.util.Set;
Expand Down Expand Up @@ -36,7 +34,7 @@
import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField;

@Entity
@Indexed(routingBinder = @RoutingBinderRef(type = VersionAndLanguageRoutingBinder.class))
@Indexed(routingBinder = @RoutingBinderRef(type = QuarkusVersionAndLanguageRoutingBinder.class))
public class Guide {
@Id
@JavaType(URIType.class)
Expand All @@ -46,7 +44,7 @@ public class Guide {
@Enumerated(EnumType.STRING)
public Language language;

public String version;
public String quarkusVersion;

@KeywordField
public String type;
Expand Down Expand Up @@ -86,13 +84,6 @@ public class Guide {
@KeywordField(name = "extensions_faceting", searchable = Searchable.YES, projectable = Projectable.YES, aggregable = Aggregable.YES)
public Set<String> extensions = Set.of();

/**
* @return {@code true} if the guide is a Quarkus guide, {@code false} if this guide is a Quarkiverse guide.
*/
public boolean quarkusGuide() {
return QUARKUS_ORIGIN.equals(origin);
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -112,7 +103,7 @@ public int hashCode() {

@Override
public String toString() {
return "Guide{" +
return getClass().getSimpleName() + "{" +
"url=<" + url + '>' +
'}';
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
package io.quarkus.search.app.entity;

import java.util.List;

import org.hibernate.search.mapper.pojo.bridge.RoutingBridge;
import org.hibernate.search.mapper.pojo.bridge.binding.RoutingBindingContext;
import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.RoutingBinder;
import org.hibernate.search.mapper.pojo.bridge.runtime.RoutingBridgeRouteContext;
import org.hibernate.search.mapper.pojo.route.DocumentRoutes;

public class VersionAndLanguageRoutingBinder implements RoutingBinder {
public static String key(String version, Language language) {
public class QuarkusVersionAndLanguageRoutingBinder implements RoutingBinder {
private static String key(String version, Language language) {
if (language == null) {
return version;
}
return version + "/" + language.code;
}

public static List<String> searchKeys(String version, Language language) {
return List.of(key(version, language), key(version, null));
}

@Override
public void bind(RoutingBindingContext context) {
context.dependencies()
.use("version")
.use("quarkusVersion")
.use("language");

context.bridge(Guide.class, new GuideRoutingBridge());
Expand All @@ -25,7 +34,7 @@ public static class GuideRoutingBridge implements RoutingBridge<Guide> {
@Override
public void route(DocumentRoutes routes, Object entityIdentifier, Guide entity,
RoutingBridgeRouteContext context) {
routes.addRoute().routingKey(key(entity.version, entity.language));
routes.addRoute().routingKey(key(entity.quarkusVersion, entity.language));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,16 @@ private Bridge(ValueBridge<S, String> bridge, EnumMap<Language, IndexFieldRefere
}

@Override
public void write(DocumentElement target, Language discriminator, S bridgedElement) {
target.addValue(fields.get(discriminator), bridge.toIndexedValue(bridgedElement, null));
public void write(DocumentElement target, Language language, S bridgedElement) {
if (language != null) {
target.addValue(fields.get(language), bridge.toIndexedValue(bridgedElement, null));
} else {
// No language: this happens for Quarkiverse guides in particular.
// Just populate all language fields with the same value.
for (IndexFieldReference<String> field : fields.values()) {
target.addValue(field, bridge.toIndexedValue(bridgedElement, null));
}
}
}
}
}
Expand Down
134 changes: 68 additions & 66 deletions src/main/java/io/quarkus/search/app/quarkusio/QuarkusIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public static Path yamlQuarkiverseMetadataPath(String version) {
private final GitCloneDirectory mainRepository;
private final Map<Language, GitCloneDirectory> localizedSites;
private final Map<Language, URI> localizedSiteUris;
private final CloseableDirectory prefetchedQuarkiverseGuides = CloseableDirectory.temp("quarkiverse-guides-");
private final CloseableDirectory prefetchedGuides = CloseableDirectory.temp("quarkiverse-guides-");
private final FailureCollector failureCollector;

public QuarkusIO(QuarkusIOConfig config, GitCloneDirectory mainRepository,
Expand All @@ -100,7 +100,7 @@ public QuarkusIO(QuarkusIOConfig config, GitCloneDirectory mainRepository,
@Override
public void close() throws Exception {
try (var closer = new Closer<Exception>()) {
closer.push(CloseableDirectory::close, prefetchedQuarkiverseGuides);
closer.push(CloseableDirectory::close, prefetchedGuides);
closer.push(GitCloneDirectory::close, mainRepository);
closer.pushAll(GitCloneDirectory::close, localizedSites.values());
}
Expand All @@ -116,14 +116,14 @@ public Stream<Guide> guides() throws IOException {
private Stream<Guide> versionedGuides() throws IOException {
return Files.list(mainRepository.resolve("_data").resolve("versioned"))
.flatMap(p -> {
var version = p.getFileName().toString().replace('-', '.');
var quarkusVersion = p.getFileName().toString().replace('-', '.');
Path quarkiverse = p.resolve("index").resolve("quarkiverse.yaml");
Path quarkus = p.resolve("index").resolve("quarkus.yaml");
Map<Language, Catalog> translations = translations(
(directory, language) -> resolveTranslationPath(p.getFileName().toString(),
quarkus.getFileName().toString(), directory, language));

Stream<Guide> quarkusGuides = parseYamlMetadata(webUri, quarkus, version)
Stream<Guide> quarkusGuides = parseYamlMetadata(webUri, quarkus, quarkusVersion)
.flatMap(guide -> translateGuide(guide, translations));
if (Files.exists(quarkiverse)) {
// the full content won't be translated, but the title/summary may be, so we want to get that info out if available
Expand All @@ -134,8 +134,7 @@ private Stream<Guide> versionedGuides() throws IOException {
quarkiverse.getFileName().toString(), directory, language));
return Stream.concat(
quarkusGuides,
parseYamlQuarkiverseMetadata(webUri, quarkiverse, version)
.flatMap(guide -> translateGuide(guide, quarkiverseTranslations)));
parseYamlQuarkiverseMetadata(quarkiverse, quarkusVersion));
} else {
return quarkusGuides;
}
Expand Down Expand Up @@ -170,15 +169,15 @@ private static Path resolveLegacyTranslationPath(String filename, GitCloneDirect
}

@SuppressWarnings("unchecked")
private Stream<Guide> parseYamlMetadata(URI webUri, Path quarkusYamlPath, String version) {
private Stream<Guide> parseYamlMetadata(URI webUri, Path quarkusYamlPath, String quarkusVersion) {
return parse(quarkusYamlPath, quarkusYaml -> {
Set<Guide> parsed = new HashSet<>();
for (Map<String, Object> parsedGuide : ((Map<String, List<Object>>) quarkusYaml.get("types")).entrySet()
.stream()
.flatMap(e -> e.getValue().stream())
.map(e -> (Map<String, Object>) e).toList()) {

Guide guide = createGuide(webUri, version, toString(parsedGuide.get("type")), parsedGuide, "summary");
Guide guide = createGuide(webUri, quarkusVersion, toString(parsedGuide.get("type")), parsedGuide, "summary");
guide.categories = toSet(parsedGuide.get("categories"));
guide.keywords = toString(parsedGuide.get("keywords"));
guide.topics = toSet(parsedGuide.get("topics"));
Expand All @@ -192,13 +191,13 @@ private Stream<Guide> parseYamlMetadata(URI webUri, Path quarkusYamlPath, String
}

@SuppressWarnings("unchecked")
private Stream<Guide> parseYamlQuarkiverseMetadata(URI webUri, Path quarkusYamlPath, String version) {
private Stream<Guide> parseYamlQuarkiverseMetadata(Path quarkusYamlPath, String quarkusVersion) {
return parse(quarkusYamlPath, quarkusYaml -> {
Set<Guide> parsed = new HashSet<>();
for (Map.Entry<String, List<Map<String, Object>>> type : ((Map<String, List<Map<String, Object>>>) quarkusYaml
.get("types")).entrySet()) {
for (Map<String, Object> parsedGuide : type.getValue()) {
Guide guide = createGuide(webUri, version, type.getKey(), parsedGuide, "summary");
Guide guide = createQuarkiverseGuide(quarkusVersion, type.getKey(), parsedGuide, "summary");
guide.categories = toSet(parsedGuide.get("categories"));
parsed.add(guide);
}
Expand Down Expand Up @@ -252,41 +251,38 @@ private Map<Language, Catalog> translations(BiFunction<GitCloneDirectory, Langua
return map;
}

private Stream<? extends Guide> translateGuide(Guide guide, Map<Language, Catalog> transaltions) {
private Stream<Guide> translateGuide(Guide guide, Map<Language, Catalog> translations) {
if (guide.language == null) {
// Quarkiverse guides are not translated; we use a single instance for all languages.
return Stream.of(guide);
}
return Stream.concat(
Stream.of(guide),
localizedSites.entrySet().stream().map(entry -> {
Language language = entry.getKey();
GitCloneDirectory repository = entry.getValue();
Catalog messages = transaltions.get(language);
Catalog messages = translations.get(language);

Guide translated = new Guide();
translated.url = localizedUrl(language, guide);
translated.language = language;
translated.type = guide.type;
translated.version = guide.version;
translated.quarkusVersion = guide.quarkusVersion;
translated.origin = guide.origin;
translated.title = translate(messages, guide.title);
translated.summary = translate(messages, guide.summary);
// If it is a quarkiverse guide, it means that it is an external url, we can't do much about it
// and we just use the same provider/file that we've already used for the original guide in English;
// otherwise we try to find a corresponding translated HTML:
if (guide.quarkusGuide()) {
GitInputProvider gitInputProvider = new GitInputProvider(
repository.git(), repository.pagesTree(),
localizedHtmlPath(guide.version, guide.url.getPath()));
if (!gitInputProvider.isFileAvailable()) {
// if a file is not present we do not want to add such guide. Since if the html is not there
// it means that users won't be able to open it on the site, and returning it in the search results make it pointless.
failureCollector.warning(FailureCollector.Stage.TRANSLATION,
"Guide " + translated
+ " is ignored since we were not able to find an HTML content file for it.");
return null;
}
translated.htmlFullContentProvider = gitInputProvider;
} else {
translated.htmlFullContentProvider = guide.htmlFullContentProvider;
GitInputProvider gitInputProvider = new GitInputProvider(
repository.git(), repository.pagesTree(),
localizedHtmlPath(guide.quarkusVersion, guide.url.getPath()));
if (!gitInputProvider.isFileAvailable()) {
// if a file is not present we do not want to add such guide. Since if the html is not there
// it means that users won't be able to open it on the site, and returning it in the search results make it pointless.
failureCollector.warning(FailureCollector.Stage.TRANSLATION,
"Guide " + translated
+ " is ignored since we were not able to find an HTML content file for it.");
return null;
}
translated.htmlFullContentProvider = gitInputProvider;
translated.categories = guide.categories;
translated.extensions = guide.extensions;
translated.keywords = translate(messages, guide.keywords);
Expand All @@ -312,21 +308,10 @@ private static String translate(Catalog messages, String key) {
private URI localizedUrl(Language language, Guide guide) {
URI url = guide.url;
try {
// if we have a Quarkus "local" guide then we have to replace the "host" part to use the localized one
// that we store in the web uris:
if (guide.quarkusGuide()) {
URI localized = localizedSiteUris.get(language);
return new URI(
localized.getScheme(), localized.getAuthority(), url.getPath(),
url.getQuery(), url.getFragment());
} else {
// otherwise since the link for Quarkiverse (external) guides is exactly the same for all the languages/versions
// and we've already added a version parameter to the query part of the url, we just append the language to it
// to make it unique:
return new URI(
url.getScheme(), url.getAuthority(), url.getPath(),
url.getQuery() + "&language=" + language.code, url.getFragment());
}
URI localized = localizedSiteUris.get(language);
return new URI(
localized.getScheme(), localized.getAuthority(), url.getPath(),
url.getQuery(), url.getFragment());
} catch (URISyntaxException e) {
throw new IllegalArgumentException(
"Cannot create a localized version of the URL (%s). It is expected to have a correctly formatted URL at this point to a Quarkiverse guide (i.e. http://smth.smth/smth) : %s"
Expand Down Expand Up @@ -360,35 +345,52 @@ private static Stream<Guide> parse(Path quarkusYamlPath,
return parser.apply(quarkusYaml);
}

private Guide createGuide(URI webUri, String version, String type, Map<String, Object> parsedGuide,
private Guide createGuide(URI urlBase, String quarkusVersion, String type, Map<String, Object> parsedGuide,
String summaryKey) {
String parsedUrl = toString(parsedGuide.get("url"));
if (parsedUrl.startsWith("http")) {
// we are looking at a quarkiverse guide:
return createQuarkiverseGuide(quarkusVersion, type, parsedGuide, summaryKey);
} else {
return createCoreGuide(urlBase, quarkusVersion, type, parsedGuide, summaryKey);
}
}

private Guide createCoreGuide(URI urlBase, String quarkusVersion, String type, Map<String, Object> parsedGuide,
String summaryKey) {
Guide guide = new Guide();
guide.quarkusVersion = quarkusVersion;
guide.language = Language.ENGLISH;
guide.origin = toString(parsedGuide.get("origin"));
if (guide.origin == null) {
guide.origin = QUARKUS_ORIGIN;
}
guide.type = type;
guide.title = renderMarkdown(toString(parsedGuide.get("title")));
guide.origin = toString(parsedGuide.get("origin"));
guide.version = version;
guide.summary = renderMarkdown(toString(parsedGuide.get(summaryKey)));
String parsedUrl = toString(parsedGuide.get("url"));
URI uri;
if (parsedUrl.startsWith("http")) {
// we are looking at a quarkiverse guide:
uri = httpUrl(version, parsedUrl);
guide.htmlFullContentProvider = new UrlInputProvider(prefetchedQuarkiverseGuides, uri, failureCollector);

if (guide.origin == null) {
guide.origin = QUARKIVERSE_ORIGIN;
}
} else {
uri = httpUrl(webUri, version, parsedUrl);
guide.htmlFullContentProvider = new GitInputProvider(mainRepository.git(), mainRepository.pagesTree(),
htmlPath(version, parsedUrl));
guide.url = httpUrl(urlBase, quarkusVersion, parsedUrl);
guide.htmlFullContentProvider = new GitInputProvider(mainRepository.git(), mainRepository.pagesTree(),
htmlPath(quarkusVersion, parsedUrl));
return guide;
}

if (guide.origin == null) {
guide.origin = QUARKUS_ORIGIN;
}
private Guide createQuarkiverseGuide(String quarkusVersion, String type, Map<String, Object> parsedGuide,
String summaryKey) {
Guide guide = new Guide();
guide.quarkusVersion = quarkusVersion;
// This is on purpose and will lead to the same guide instance being used for all languages
guide.language = null;
guide.origin = toString(parsedGuide.get("origin"));
if (guide.origin == null) {
guide.origin = QUARKIVERSE_ORIGIN;
}
guide.url = uri;
guide.type = type;
guide.title = renderMarkdown(toString(parsedGuide.get("title")));
guide.summary = renderMarkdown(toString(parsedGuide.get(summaryKey)));
String parsedUrl = toString(parsedGuide.get("url"));
guide.url = httpUrl(quarkusVersion, parsedUrl);
guide.htmlFullContentProvider = new UrlInputProvider(prefetchedGuides, guide.url, failureCollector);
return guide;
}

Expand Down
Loading