Skip to content

Commit

Permalink
[MINDEXER-195] RAO support for RR backend (#326)
Browse files Browse the repository at this point in the history
RAO is powered by Nx2 that emits DIFFERENT structured HTML than Maven Central, hence HTML parsing is slightly different.

"exploded" bits to be able to provide different extractors and now RR supports Maven Central and Nx2 remote repositories.

---

https://issues.apache.org/jira/browse/MINDEXER-195
  • Loading branch information
cstamas authored Jul 24, 2023
1 parent 2455839 commit 87f7f27
Show file tree
Hide file tree
Showing 9 changed files with 456 additions and 181 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.search.backend.remoterepository.internal;
package org.apache.maven.search.backend.remoterepository;

import java.util.HashMap;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.search.backend.remoterepository;

import java.util.HashMap;
import java.util.Map;

import org.apache.maven.search.MAVEN;
import org.apache.maven.search.Record;
import org.apache.maven.search.request.Field;

import static java.util.Objects.requireNonNull;

/**
* Helper class that creates record instances for provided backend.
*/
public final class RecordFactory {

private final RemoteRepositorySearchBackend backend;

public RecordFactory(RemoteRepositorySearchBackend backend) {
this.backend = requireNonNull(backend);
}

/**
* Creates {@link Record} on behalf of backend. Only {@code groupId} is mandatory, all the other values are optional (nullable).
*/
public Record create(String groupId, String artifactId, String version, String classifier, String fileExtension) {
requireNonNull(groupId);
HashMap<Field, Object> result = new HashMap<>();
mayPut(result, MAVEN.GROUP_ID, groupId);
mayPut(result, MAVEN.ARTIFACT_ID, artifactId);
mayPut(result, MAVEN.VERSION, version);
mayPut(result, MAVEN.CLASSIFIER, classifier);
mayPut(result, MAVEN.FILE_EXTENSION, fileExtension);
return new Record(backend.getBackendId(), backend.getRepositoryId(), null, null, result);
}

private static void mayPut(Map<Field, Object> result, Field fieldName, Object value) {
if (value == null) {
return;
}
if (value instanceof String && ((String) value).isBlank()) {
return;
}
result.put(fieldName, value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,35 +18,58 @@
*/
package org.apache.maven.search.backend.remoterepository;

import org.apache.maven.search.backend.remoterepository.extractor.MavenCentralResponseExtractor;
import org.apache.maven.search.backend.remoterepository.extractor.Nx2ResponseExtractor;
import org.apache.maven.search.backend.remoterepository.internal.Java11HttpClientRemoteRepositorySearchTransport;
import org.apache.maven.search.backend.remoterepository.internal.RemoteRepositorySearchBackendImpl;

/**
* The remote repository search backend factory.
*/
public class RemoteRepositorySearchBackendFactory {
public static final String DEFAULT_BACKEND_ID = "central-rr";
public static final String BACKEND_ID = "search-rr";

public static final String DEFAULT_REPOSITORY_ID = "central";
public static final String CENTRAL_REPOSITORY_ID = "central";

public static final String DEFAULT_URI = "https://repo.maven.apache.org/maven2/";
public static final String CENTRAL_URI = "https://repo.maven.apache.org/maven2/";

public static final String RAO_RELEASES_REPOSITORY_ID = "apache.releases.https";

public static final String RAO_RELEASES_URI = "https://repository.apache.org/content/repositories/releases/";

/**
* Creates "default" RR search backend against Maven Central suitable for most use cases.
*/
public static RemoteRepositorySearchBackend createDefault() {
public static RemoteRepositorySearchBackend createDefaultMavenCentral() {
return create(
BACKEND_ID,
CENTRAL_REPOSITORY_ID,
CENTRAL_URI,
new Java11HttpClientRemoteRepositorySearchTransport(),
new MavenCentralResponseExtractor());
}

/**
* Creates "default" RR search backend against repository.apache.org releases repository suitable for most use cases.
*/
public static RemoteRepositorySearchBackend createDefaultRAOReleases() {
return create(
DEFAULT_BACKEND_ID,
DEFAULT_REPOSITORY_ID,
DEFAULT_URI,
new Java11HttpClientRemoteRepositorySearchTransport());
BACKEND_ID,
RAO_RELEASES_REPOSITORY_ID,
RAO_RELEASES_URI,
new Java11HttpClientRemoteRepositorySearchTransport(),
new Nx2ResponseExtractor());
}

/**
* Creates RR search backend using provided parameters.
*/
public static RemoteRepositorySearchBackend create(
String backendId, String repositoryId, String baseUri, RemoteRepositorySearchTransport transport) {
return new RemoteRepositorySearchBackendImpl(backendId, repositoryId, baseUri, transport);
String backendId,
String repositoryId,
String baseUri,
RemoteRepositorySearchTransport transport,
ResponseExtractor responseExtractor) {
return new RemoteRepositorySearchBackendImpl(backendId, repositoryId, baseUri, transport, responseExtractor);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.search.backend.remoterepository;

import java.util.List;

import org.apache.maven.search.Record;
import org.jsoup.nodes.Document;

/**
* A component extracting data from response, that is aware of any remote specifics (like HTML structure).
*/
public interface ResponseExtractor {
/**
* Method parsing document out of HTML page like this one:
* <a href="https://repo.maven.apache.org/maven2/org/apache/maven/indexer/">https://repo.maven.apache.org/maven2/org/apache/maven/indexer/</a>
* <p>
* Note: this method is "best effort" and may enlist non-existent As (think nested Gs).
*/
int populateG(Context context, Document document, RecordFactory recordFactory, List<Record> page);

/**
* Method parsing document out of XML Maven Metadata like this one:
* <a href="https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/maven-metadata.xml">https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/maven-metadata.xml</a>
*/
int populateGA(Context context, Document document, RecordFactory recordFactory, List<Record> page);

/**
* Method parsing document out of HTML page like this one:
* <a href="https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/7.0.3/">https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/7.0.3/</a>
* <p>
* Note: this method is "best effort" and may enlist fake artifacts.
*/
int populateGAV(Context context, Document document, RecordFactory recordFactory, List<Record> page);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.search.backend.remoterepository.extractor;

import java.util.List;

import org.apache.maven.search.Record;
import org.apache.maven.search.backend.remoterepository.Context;
import org.apache.maven.search.backend.remoterepository.RecordFactory;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/**
* Extractor for Maven Central.
*/
public class MavenCentralResponseExtractor extends ResponseExtractorSupport {
/**
* Extracts the "name" from {@code href} attribute. In case of Maven Central, the href
* attribute contains name in realative form as {@code "name/"} (followed by slash), if name denotes
* a directory. The trailing slash is removed by this method, if any.
*/
private String nameInHref(Element element) {
String name = element.attr("href");
if (name.endsWith("/")) {
name = name.substring(0, name.length() - 1);
}
return name;
}

@Override
public int populateG(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
// Index HTML page like this one:
// https://repo.maven.apache.org/maven2/org/apache/maven/indexer/
Element contents = document.getElementById("contents");
if (contents != null) {
for (Element element : contents.getElementsByTag("a")) {
String name = nameInHref(element);
if (accept(name)) {
page.add(recordFactory.create(context.getGroupId(), name, null, null, null));
}
}
}
return page.size();
}

@Override
public int populateGAV(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
// Index HTML page like this one:
// https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/7.0.3/
Element contents = document.getElementById("contents");
if (contents != null) {
for (Element element : contents.getElementsByTag("a")) {
// skip possible subdirectories and files without extensions
String name = element.attr("href");
if (name.endsWith("/") || !name.contains(".")) {
continue;
}
populateGAVName(context, nameInHref(element), recordFactory, page);
}
}
return page.size();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.search.backend.remoterepository.extractor;

import java.util.List;

import org.apache.maven.search.Record;
import org.apache.maven.search.backend.remoterepository.Context;
import org.apache.maven.search.backend.remoterepository.RecordFactory;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
* Extractor for Sonatype Nexus2.
*/
public class Nx2ResponseExtractor extends ResponseExtractorSupport {
protected boolean accept(String name) {
return !"Parent Directory".equals(name) && super.accept(name);
}

private String name(Element element) {
String name = element.text();
if (name.endsWith("/")) {
name = name.substring(0, name.length() - 1);
}
return name;
}

@Override
public int populateG(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
// Index HTML page like this one:
// https://repo.maven.apache.org/maven2/org/apache/maven/indexer/
Elements elements = document.getElementsByTag("a");
for (Element element : elements) {
String name = name(element);
if (accept(name)) {
page.add(recordFactory.create(context.getGroupId(), name, null, null, null));
}
}
return page.size();
}

@Override
public int populateGAV(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
// Index HTML page like this one:
// https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/7.0.3/
Elements elements = document.getElementsByTag("a");
for (Element element : elements) {
// skip possible subdirectories and files without extensions
String name = element.attr("href");
if (name.endsWith("/") || !name.contains(".")) {
continue;
}
populateGAVName(context, name(element), recordFactory, page);
}
return page.size();
}
}
Loading

0 comments on commit 87f7f27

Please sign in to comment.