Skip to content

Commit

Permalink
Upgrade Lucene Codec to Lucene99 (opensearch-project#800)
Browse files Browse the repository at this point in the history
Signed-off-by: Joanne Wang <[email protected]>
Co-authored-by: Andriy Redko <[email protected]>
  • Loading branch information
jowg-amazon and reta authored Jan 13, 2024
1 parent 1dd6c2d commit 988377a
Show file tree
Hide file tree
Showing 13 changed files with 112 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/multi-node-test-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
needs: Get-CI-Image-Tag
strategy:
matrix:
java: [ 11, 17 ]
java: [ 11, 17, 21 ]
# Job name
name: Build and test Security Analytics on linux
# This job runs on Linux
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/security-test-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
build:
strategy:
matrix:
java: [ 11, 17 ]
java: [ 11, 17, 21 ]
# Job name
name: Build and test SecurityAnalytics
# This job runs on Linux
Expand Down
3 changes: 1 addition & 2 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ When launching a cluster using one of the above commands, logs are placed in `bu

#### Building from the IDE

Currently, the only IDE we support is IntelliJ IDEA. It's free, it's open source, it works. The gradle tasks above can also be launched from IntelliJ's Gradle toolbar and the extra parameters can be passed in via the Launch Configurations VM arguments.
Currently, the only IDE we support is IntelliJ IDEA. It's free, it's open source, it works. The gradle tasks above can also be launched from IntelliJ's Gradle toolbar and the extra parameters can be passed in via the Launch Configurations VM arguments.

### Backport

- [Link to backport documentation](https://github.com/opensearch-project/opensearch-plugins/blob/main/BACKPORT.md)

11 changes: 9 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ buildscript {

plugins {
id 'java'
id 'com.diffplug.spotless' version '6.20.0'
id "com.netflix.nebula.ospackage" version "11.3.0"
id 'com.diffplug.spotless' version '6.22.0'
id "com.netflix.nebula.ospackage" version "11.5.0"
id 'java-library'
}

Expand Down Expand Up @@ -143,6 +143,13 @@ repositories {
sourceSets.main.java.srcDirs = ['src/main/generated','src/main/java']
configurations {
zipArchive

all {
resolutionStrategy {
// for spotless transitive dependency CVE
force "org.eclipse.platform:org.eclipse.core.runtime:3.29.0"
}
}
}

dependencies {
Expand Down
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
4 changes: 2 additions & 2 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2.1-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionSha256Sum=03ec176d388f2aa99defcadc3ac6adf8dd2bce5145a129659537c0874dea5ad1
distributionSha256Sum=9d926787066a081739e8200858338b4a69e837c3a821a33aca9db09dd4a41026
18 changes: 10 additions & 8 deletions gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
Expand Down Expand Up @@ -133,6 +134,7 @@ else
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
Expand All @@ -143,15 +145,15 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
Expand Down Expand Up @@ -200,11 +202,11 @@ fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Collect all arguments for the java command;
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
# shell script including quotes and variable substitutions, so put them in
# double quotes to make sure that they get re-expanded; and
# * put everything else in single quotes, so that it's not re-expanded.
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.

set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,50 @@
package org.opensearch.securityanalytics.correlation.index.codec;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.securityanalytics.correlation.index.codec.correlation950.CorrelationCodec;
import org.opensearch.securityanalytics.correlation.index.codec.correlation950.PerFieldCorrelationVectorsFormat;
import org.opensearch.securityanalytics.correlation.index.codec.correlation950.CorrelationCodec950;
import org.opensearch.securityanalytics.correlation.index.codec.correlation990.CorrelationCodec990;
import org.opensearch.securityanalytics.correlation.index.codec.correlation990.PerFieldCorrelationVectorsFormat990;
import org.opensearch.securityanalytics.correlation.index.codec.correlation950.PerFieldCorrelationVectorsFormat950;

import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Supplier;

public enum CorrelationCodecVersion {
V_9_5_0(
"CorrelationCodec",
"CorrelationCodec950",
new Lucene95Codec(),
new PerFieldCorrelationVectorsFormat(Optional.empty()),
(userCodec, mapperService) -> new CorrelationCodec(userCodec, new PerFieldCorrelationVectorsFormat(Optional.of(mapperService))),
CorrelationCodec::new
new PerFieldCorrelationVectorsFormat950(Optional.empty()),
(userCodec, mapperService) -> new CorrelationCodec950(userCodec, new PerFieldCorrelationVectorsFormat950(Optional.of(mapperService))),
CorrelationCodec950::new
),
V_9_9_0(
"CorrelationCodec990",
new Lucene99Codec(),
new PerFieldCorrelationVectorsFormat990(Optional.empty()),
(userCodec, mapperService) -> new CorrelationCodec990(userCodec, new PerFieldCorrelationVectorsFormat990(Optional.of(mapperService))),
CorrelationCodec990::new
);

private static final CorrelationCodecVersion CURRENT = V_9_5_0;
private static final CorrelationCodecVersion CURRENT = V_9_9_0;
private final String codecName;
private final Codec defaultCodecDelegate;
private final PerFieldCorrelationVectorsFormat perFieldCorrelationVectorsFormat;
private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat;
private final BiFunction<Codec, MapperService, Codec> correlationCodecSupplier;
private final Supplier<Codec> defaultCorrelationCodecSupplier;

CorrelationCodecVersion(String codecName,
Codec defaultCodecDelegate,
PerFieldCorrelationVectorsFormat perFieldCorrelationVectorsFormat,
PerFieldKnnVectorsFormat perFieldKnnVectorsFormat,
BiFunction<Codec, MapperService, Codec> correlationCodecSupplier,
Supplier<Codec> defaultCorrelationCodecSupplier) {
this.codecName = codecName;
this.defaultCodecDelegate = defaultCodecDelegate;
this.perFieldCorrelationVectorsFormat = perFieldCorrelationVectorsFormat;
this.perFieldKnnVectorsFormat = perFieldKnnVectorsFormat;
this.correlationCodecSupplier = correlationCodecSupplier;
this.defaultCorrelationCodecSupplier = defaultCorrelationCodecSupplier;
}
Expand All @@ -50,8 +61,8 @@ public Codec getDefaultCodecDelegate() {
return defaultCodecDelegate;
}

public PerFieldCorrelationVectorsFormat getPerFieldCorrelationVectorsFormat() {
return perFieldCorrelationVectorsFormat;
public PerFieldKnnVectorsFormat getPerFieldCorrelationVectorsFormat() {
return perFieldKnnVectorsFormat;
}

public BiFunction<Codec, MapperService, Codec> getCorrelationCodecSupplier() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.securityanalytics.correlation.index.codec.CorrelationCodecVersion;

public class CorrelationCodec extends FilterCodec {
public class CorrelationCodec950 extends FilterCodec {
private static final CorrelationCodecVersion VERSION = CorrelationCodecVersion.V_9_5_0;
private final PerFieldCorrelationVectorsFormat perFieldCorrelationVectorsFormat;
private final PerFieldKnnVectorsFormat perFieldCorrelationVectorsFormat;

public CorrelationCodec() {
public CorrelationCodec950() {
this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldCorrelationVectorsFormat());
}

public CorrelationCodec(Codec delegate, PerFieldCorrelationVectorsFormat perFieldCorrelationVectorsFormat) {
public CorrelationCodec950(Codec delegate, PerFieldKnnVectorsFormat perFieldCorrelationVectorsFormat) {
super(VERSION.getCodecName(), delegate);
this.perFieldCorrelationVectorsFormat = perFieldCorrelationVectorsFormat;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
*/
package org.opensearch.securityanalytics.correlation.index.codec.correlation950;

import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat;
import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.securityanalytics.correlation.index.codec.BasePerFieldCorrelationVectorsFormat;

import java.util.Optional;

public class PerFieldCorrelationVectorsFormat extends BasePerFieldCorrelationVectorsFormat {
public class PerFieldCorrelationVectorsFormat950 extends BasePerFieldCorrelationVectorsFormat {

public PerFieldCorrelationVectorsFormat(final Optional<MapperService> mapperService) {
public PerFieldCorrelationVectorsFormat950(final Optional<MapperService> mapperService) {
super(
mapperService,
Lucene95HnswVectorsFormat.DEFAULT_MAX_CONN,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.securityanalytics.correlation.index.codec.correlation990;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.securityanalytics.correlation.index.codec.CorrelationCodecVersion;

public class CorrelationCodec990 extends FilterCodec {
private static final CorrelationCodecVersion VERSION = CorrelationCodecVersion.V_9_9_0;
private final PerFieldKnnVectorsFormat perFieldCorrelationVectorsFormat;

public CorrelationCodec990() {
this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldCorrelationVectorsFormat());
}

public CorrelationCodec990(Codec delegate, PerFieldKnnVectorsFormat perFieldCorrelationVectorsFormat) {
super(VERSION.getCodecName(), delegate);
this.perFieldCorrelationVectorsFormat = perFieldCorrelationVectorsFormat;
}

@Override
public KnnVectorsFormat knnVectorsFormat() {
return perFieldCorrelationVectorsFormat;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.securityanalytics.correlation.index.codec.correlation990;

import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.securityanalytics.correlation.index.codec.BasePerFieldCorrelationVectorsFormat;

import java.util.Optional;

public class PerFieldCorrelationVectorsFormat990 extends BasePerFieldCorrelationVectorsFormat {

public PerFieldCorrelationVectorsFormat990(final Optional<MapperService> mapperService) {
super(
mapperService,
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH,
() -> new Lucene99HnswVectorsFormat(),
(maxConn, beamWidth) -> new Lucene99HnswVectorsFormat(maxConn, beamWidth)
);
}
}
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
org.opensearch.securityanalytics.correlation.index.codec.correlation950.CorrelationCodec
org.opensearch.securityanalytics.correlation.index.codec.correlation950.CorrelationCodec950
org.opensearch.securityanalytics.correlation.index.codec.correlation990.CorrelationCodec990

0 comments on commit 988377a

Please sign in to comment.