Skip to content

Commit

Permalink
Merge pull request #125 from marko-bekhta/i85-Keep-git-clones-on-disk…
Browse files Browse the repository at this point in the history
…-and-update-them

Keep git clones on disk and update them instead of re-cloning on each reindexing
  • Loading branch information
yrodiere authored Jan 11, 2024
2 parents 8accfb3 + fddfc2e commit db6b341
Show file tree
Hide file tree
Showing 6 changed files with 414 additions and 96 deletions.
113 changes: 61 additions & 52 deletions src/main/java/io/quarkus/search/app/fetching/FetchingService.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
import java.net.URI;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;

import jakarta.annotation.PreDestroy;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;

Expand All @@ -24,38 +26,34 @@
import io.quarkus.logging.Log;
import io.quarkus.runtime.LaunchMode;

import org.hibernate.search.util.common.impl.Closer;
import org.hibernate.search.util.common.impl.SuppressingCloser;

import org.apache.commons.io.function.IOBiFunction;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.jboss.logging.Logger;
import io.vertx.core.impl.ConcurrentHashSet;

@ApplicationScoped
public class FetchingService {
private static final Logger log = Logger.getLogger(FetchingService.class);

@Inject
FetchingConfig fetchingConfig;

@Inject
QuarkusIOConfig quarkusIOConfig;

private final Map<URI, GitCloneDirectory.Details> detailsCache = new ConcurrentHashMap<>();
private final Set<CloseableDirectory> tempDirectories = new ConcurrentHashSet<>();

public QuarkusIO fetchQuarkusIo() {
CompletableFuture<GitCloneDirectory> main = null;
Map<Language, CompletableFuture<GitCloneDirectory>> localized = new LinkedHashMap<>();
try (CloseableDirectory unzipDir = LaunchMode.DEVELOPMENT.equals(LaunchMode.current())
? CloseableDirectory.temp("quarkus.io-unzipped")
: null;
SimpleExecutor executor = new SimpleExecutor(fetchingConfig.parallelism())) {
main = executor.submit(() -> fetchQuarkusIoSite("quarkus.io", quarkusIOConfig.gitUri(),
QuarkusIO.SOURCE_BRANCH, QuarkusIO.PAGES_BRANCH, unzipDir));
try (SimpleExecutor executor = new SimpleExecutor(fetchingConfig.parallelism())) {
main = executor.submit(() -> fetchQuarkusIoSite("quarkus.io", quarkusIOConfig.gitUri(), QuarkusIO.MAIN_BRANCHES));
for (Map.Entry<Language, QuarkusIOConfig.SiteConfig> entry : sortMap(quarkusIOConfig.localized()).entrySet()) {
var language = entry.getKey();
var config = entry.getValue();
localized.put(language,
executor.submit(() -> fetchQuarkusIoSite(language.code + ".quarkus.io", config.gitUri(),
QuarkusIO.LOCALIZED_SOURCE_BRANCH, QuarkusIO.LOCALIZED_PAGES_BRANCH, unzipDir)));
QuarkusIO.LOCALIZED_BRANCHES)));
}
executor.waitForSuccessOrThrow(fetchingConfig.timeout());
// If we get here, all tasks succeeded.
Expand All @@ -69,23 +67,47 @@ public QuarkusIO fetchQuarkusIo() {
}
}

private GitCloneDirectory fetchQuarkusIoSite(String siteName, URI gitUri, String sourceBranch, String pagesBranch,
CloseableDirectory unzipDir) {
private GitCloneDirectory fetchQuarkusIoSite(String siteName, URI gitUri, GitCloneDirectory.Branches branches) {
CloseableDirectory tempDir = null;
GitCloneDirectory cloneDir = null;
try {
if (LaunchMode.DEVELOPMENT.equals(LaunchMode.current()) && isZip(gitUri)) {
Log.warnf("Unzipping '%s': this application is most likely indexing only a sample of %s."
+ " See README to index the full website.",
gitUri, siteName);
Path unzippedPath = unzipDir.path().resolve(siteName);
unzip(Path.of(gitUri), unzippedPath);
gitUri = unzippedPath.toUri();
// Fall-through and clone the directory.
// While technically unnecessary (we could use the unzipped directory directly),
// this cloning ensures we run the same code in dev mode as in prod.
GitCloneDirectory.Details details = detailsCache.get(gitUri);
if (details != null) {
return details.openAndUpdate();
}

if (LaunchMode.DEVELOPMENT.equals(LaunchMode.current())) {
if (isZip(gitUri)) {
Log.warnf("Unzipping '%s': this application is most likely indexing only a sample of %s."
+ " See README to index the full website.",
gitUri, siteName);
tempDir = CloseableDirectory.temp(siteName);
tempDirectories.add(tempDir);
unzip(Path.of(gitUri), tempDir.path());
cloneDir = GitCloneDirectory.openAndUpdate(tempDir.path(), branches);
} else if (isFile(gitUri)) {
Log.infof("Using the git repository '%s' as-is without cloning to speed up indexing of %s.",
gitUri, siteName);
// In dev mode, we want to skip cloning when possible, to make things quicker.
cloneDir = GitCloneDirectory.openAndUpdate(Path.of(gitUri), branches);
}
}
return gitClone(siteName, gitUri, List.of(sourceBranch, pagesBranch),
(git, directory) -> new GitCloneDirectory(git, directory, pagesBranch));

if (cloneDir == null) {
// We always end up here in prod and tests.
// That's fine, because prod will always use remote (http/git) git URIs anyway,
// never local ones (file).
// We may skip it in dev mode though.
tempDir = CloseableDirectory.temp(siteName);
tempDirectories.add(tempDir);
cloneDir = GitCloneDirectory.clone(gitUri, tempDir.path(), branches);
}

detailsCache.put(gitUri, cloneDir.details());

return cloneDir;
} catch (RuntimeException | IOException e) {
new SuppressingCloser(e).push(tempDir).push(cloneDir);
throw new IllegalStateException("Failed to fetch '%s': %s".formatted(siteName, e.getMessage()), e);
}
}
Expand All @@ -98,35 +120,22 @@ private Map<Language, QuarkusIOConfig.SiteConfig> sortMap(Map<String, QuarkusIOC
return map;
}

private static boolean isFile(URI uri) {
return "file".equals(uri.getScheme());
}

private static boolean isZip(URI uri) {
return uri.getScheme().equals("file")
&& uri.getPath().endsWith(".zip");
return isFile(uri) && uri.getPath().endsWith(".zip");
}

private <T> T gitClone(String name, URI gitUri, List<String> branches,
IOBiFunction<Git, CloseableDirectory, T> function) {
Log.infof("Cloning '%s' from '%s'.", name, gitUri);
CloseableDirectory directory = null;
Git git = null;
try {
directory = CloseableDirectory.temp(name);
git = Git.cloneRepository()
.setURI(gitUri.toString())
.setDirectory(directory.path().toFile())
.setDepth(1)
.setNoTags()
.setBranch(branches.get(0))
.setBranchesToClone(branches.stream().map(b -> "refs/heads/" + b).toList())
.setProgressMonitor(LoggerProgressMonitor.create(log, "Cloning " + name + ": "))
// Unfortunately sparse checkouts are not supported: https://www.eclipse.org/forums/index.php/t/1094825/
.call();
return function.apply(git, directory);
} catch (RuntimeException | IOException | GitAPIException e) {
new SuppressingCloser(e)
.push(git)
.push(directory);
@PreDestroy
public void cleanupTemporaryFolders() {
try (Closer<IOException> closer = new Closer<>()) {
closer.pushAll(CloseableDirectory::close, tempDirectories);
} catch (Exception e) {
throw new IllegalStateException(
"Failed to clone git repository '%s' from '%s': %s".formatted(name, gitUri, e.getMessage()), e);
"Failed to close directories '%s': %s".formatted(tempDirectories, e.getMessage()), e);
}
}

}
20 changes: 10 additions & 10 deletions src/main/java/io/quarkus/search/app/quarkusio/QuarkusIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ public class QuarkusIO implements AutoCloseable {

public static final String QUARKUS_ORIGIN = "quarkus";
private static final String QUARKIVERSE_ORIGIN = "quarkiverse";
public static final String SOURCE_BRANCH = "develop";
public static final String PAGES_BRANCH = "master";
public static final String LOCALIZED_SOURCE_BRANCH = "main";
public static final String LOCALIZED_PAGES_BRANCH = "docs";
public static final GitCloneDirectory.Branches MAIN_BRANCHES = new GitCloneDirectory.Branches(
"develop", "master");
public static final GitCloneDirectory.Branches LOCALIZED_BRANCHES = new GitCloneDirectory.Branches(
"main", "docs");

public static URI httpUrl(URI urlBase, String version, String name) {
return urlBase.resolve(httpPath(version, name));
Expand Down Expand Up @@ -99,8 +99,8 @@ public QuarkusIO(QuarkusIOConfig config, GitCloneDirectory mainRepository,
@Override
public void close() throws Exception {
try (var closer = new Closer<Exception>()) {
closer.push(GitCloneDirectory::close, mainRepository);
closer.push(CloseableDirectory::close, prefetchedQuarkiverseGuides);
closer.push(GitCloneDirectory::close, mainRepository);
closer.pushAll(GitCloneDirectory::close, localizedSites.values());
}
}
Expand All @@ -113,7 +113,7 @@ public Stream<Guide> guides() throws IOException {
// guides based on the info from the _data/versioned/[version]/index/
// may contain quarkus.yaml as well as quarkiverse.yml
private Stream<Guide> versionedGuides() throws IOException {
return Files.list(mainRepository.directory().path().resolve("_data").resolve("versioned"))
return Files.list(mainRepository.resolve("_data").resolve("versioned"))
.flatMap(p -> {
var version = p.getFileName().toString().replace('-', '.');
Path quarkiverse = p.resolve("index").resolve("quarkiverse.yaml");
Expand Down Expand Up @@ -143,13 +143,13 @@ private Stream<Guide> versionedGuides() throws IOException {

private static Path resolveTranslationPath(String version, String filename, GitCloneDirectory directory,
Language language) {
return directory.directory().path().resolve(
return directory.resolve(
Path.of("l10n", "po", language.locale, "_data", "versioned", version, "index", filename + ".po"));
}

// older version guides like guides-2-7.yaml or guides-2-13.yaml
private Stream<Guide> legacyGuides() throws IOException {
return Files.list(mainRepository.directory().path().resolve("_data"))
return Files.list(mainRepository.resolve("_data"))
.filter(p -> !Files.isDirectory(p) && p.getFileName().toString().startsWith("guides-"))
.flatMap(p -> {
var version = p.getFileName().toString().replaceAll("guides-|\\.yaml", "").replace('-', '.');
Expand All @@ -164,7 +164,7 @@ private Stream<Guide> legacyGuides() throws IOException {
}

private static Path resolveLegacyTranslationPath(String filename, GitCloneDirectory directory, Language language) {
return directory.directory().path().resolve(
return directory.resolve(
Path.of("l10n", "po", language.locale, "_data", filename + ".po"));
}

Expand Down Expand Up @@ -275,7 +275,7 @@ private Stream<? extends Guide> translateGuide(Guide guide, Map<Language, Catalo
if (!gitInputProvider.isFileAvailable()) {
// if a file is not present we do not want to add such guide. Since if the html is not there
// it means that users won't be able to open it on the site, and returning it in the search results make it pointless.
Log.warn("Guide " + guide
Log.warn("Guide " + translated
+ " is ignored since we were not able to find an HTML content file for it.");
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,11 @@ public Path path() {
return path;
}

@Override
public String toString() {
return "CloseableDirectory{" +
"path=" + path +
", shouldDelete=" + shouldDelete +
'}';
}
}
Loading

0 comments on commit db6b341

Please sign in to comment.