getAllCaCerts();
- /**
- * Return the pem encoded CA certificate list.
- *
- * If initialized return list of pem encoded CA certificates, else return
- * null.
- *
- * @return list of pem encoded CA certificates.
- */
- List getCAList();
-
- /**
- * Update and returns the pem encoded CA certificate list.
- * @return list of pem encoded CA certificates.
- * @throws IOException
- */
- List updateCAList() throws IOException;
-
/**
* Verifies a digital Signature, given the signature and the certificate of
* the signer.
@@ -176,10 +160,32 @@ default void assertValidKeysAndCertificate() throws OzoneSecurityException {
}
}
+ /**
+ * Gets a KeyManager containing this CertificateClient's key material and trustchain.
+ * During certificate rotation this KeyManager is automatically updated with the new keys/certificates.
+ *
+ * @return A KeyManager containing keys and the trustchain for this CertificateClient.
+ * @throws CertificateException
+ */
ReloadingX509KeyManager getKeyManager() throws CertificateException;
+ /**
+ * Gets a TrustManager containing the trusted certificates of this CertificateClient.
+ * During certificate rotation this TrustManager is automatically updated with the new certificates.
+ *
+ * @return A TrustManager containing trusted certificates for this CertificateClient.
+ * @throws CertificateException
+ */
ReloadingX509TrustManager getTrustManager() throws CertificateException;
+ /**
+ * Creates a ClientTrustManager instance using the trusted certificates of this certificate client.
+ *
+ * @return The new ClientTrustManager instance.
+ * @throws IOException
+ */
+ ClientTrustManager createClientTrustManager() throws IOException;
+
/**
* Register a receiver that will be called after the certificate renewed.
*
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java
index 1f04e868a85..553b1dc812e 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateSignRequest.java
@@ -27,13 +27,12 @@
import java.util.List;
import java.util.Optional;
+import com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.validator.routines.DomainValidator;
import org.apache.hadoop.hdds.security.SecurityConfig;
import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
import org.apache.hadoop.hdds.security.x509.exception.CertificateException;
-
-import com.google.common.base.Preconditions;
import org.apache.hadoop.ozone.OzoneSecurityUtil;
import org.bouncycastle.asn1.ASN1EncodableVector;
import org.bouncycastle.asn1.ASN1Object;
@@ -390,7 +389,7 @@ private Optional getSubjectAltNameExtension() throws
if (altNames != null) {
return Optional.of(new Extension(Extension.subjectAlternativeName,
false, new DEROctetString(new GeneralNames(
- altNames.toArray(new GeneralName[altNames.size()])))));
+ altNames.toArray(new GeneralName[0])))));
}
return Optional.empty();
}
@@ -414,12 +413,10 @@ private Extensions createExtensions() throws IOException {
// Add subject alternate name extension
Optional san = getSubjectAltNameExtension();
- if (san.isPresent()) {
- extensions.add(san.get());
- }
+ san.ifPresent(extensions::add);
return new Extensions(
- extensions.toArray(new Extension[extensions.size()]));
+ extensions.toArray(new Extension[0]));
}
public CertificateSignRequest build() throws SCMSecurityException {
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java
index 31aaca568e4..66685b4bbbd 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java
@@ -131,7 +131,7 @@ public static InetSocketAddress updateListenAddress(OzoneConfiguration conf,
* Fall back to OZONE_METADATA_DIRS if not defined.
*
* @param conf
- * @return
+ * @return File
*/
public static File getScmDbDir(ConfigurationSource conf) {
File metadataDir = getDirectoryFromConfig(conf,
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/HddsVersionInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/HddsVersionInfo.java
index b76a316c90b..386b1358b97 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/HddsVersionInfo.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/HddsVersionInfo.java
@@ -44,8 +44,6 @@ public static void main(String[] args) {
System.out.println(
"Source code repository " + HDDS_VERSION_INFO.getUrl() + " -r " +
HDDS_VERSION_INFO.getRevision());
- System.out.println("Compiled by " + HDDS_VERSION_INFO.getUser() + " on "
- + HDDS_VERSION_INFO.getDate());
System.out.println(
"Compiled with protoc " + HDDS_VERSION_INFO.getHadoopProtoc2Version() +
", " + HDDS_VERSION_INFO.getGrpcProtocVersion() +
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakDetector.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakDetector.java
index 477a291f928..9579d4e73bf 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakDetector.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakDetector.java
@@ -31,7 +31,6 @@
/**
* Simple general resource leak detector using {@link ReferenceQueue} and {@link java.lang.ref.WeakReference} to
* observe resource object life-cycle and assert proper resource closure before they are GCed.
- *
*
* Example usage:
*
@@ -43,16 +42,18 @@
* // report leaks, don't refer to the original object (MyResource) here.
* System.out.println("MyResource is not closed before being discarded.");
* });
- *
- * @Override
+ * }
+ * }
+ *
+ *
+ * {@code @Override
* public void close() {
* // proper resources cleanup...
* // inform tracker that this object is closed properly.
* leakTracker.close();
* }
- * }
- *
- * }
+ * }
+ *
*/
public class LeakDetector {
private static final Logger LOG = LoggerFactory.getLogger(LeakDetector.class);
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/VersionInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/VersionInfo.java
index 349c0a86206..d3de20cd476 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/VersionInfo.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/VersionInfo.java
@@ -65,18 +65,6 @@ public String getRevision() {
return info.getProperty("revision", "Unknown");
}
- public String getBranch() {
- return info.getProperty("branch", "Unknown");
- }
-
- public String getDate() {
- return info.getProperty("date", "Unknown");
- }
-
- public String getUser() {
- return info.getProperty("user", "Unknown");
- }
-
public String getUrl() {
return info.getProperty("url", "Unknown");
}
@@ -108,7 +96,6 @@ public String getCompilePlatform() {
public String getBuildVersion() {
return getVersion() +
" from " + getRevision() +
- " by " + getUser() +
" source checksum " + getSrcChecksum();
}
}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/BooleanCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/BooleanCodec.java
index 2ec396c0ffa..6d416ea2ef3 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/BooleanCodec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/BooleanCodec.java
@@ -36,6 +36,11 @@ private BooleanCodec() {
// singleton
}
+ @Override
+ public Class getTypeClass() {
+ return Boolean.class;
+ }
+
@Override
public boolean supportCodecBuffer() {
return true;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
index 46779648e67..54bbf42c468 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
@@ -30,6 +30,9 @@
public interface Codec {
byte[] EMPTY_BYTE_ARRAY = {};
+ /** @return the class of the {@link T}. */
+ Class getTypeClass();
+
/**
* Does this {@link Codec} support the {@link CodecBuffer} methods?
* If this method returns true, this class must implement both
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
index 1ac293b301b..87be912bb53 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
@@ -58,9 +58,9 @@ public class CodecBuffer implements UncheckedAutoCloseable {
private static class Factory {
private static volatile BiFunction constructor
= CodecBuffer::new;
- static void set(BiFunction f) {
+ static void set(BiFunction f, String name) {
constructor = f;
- LOG.info("Successfully set constructor to " + f);
+ LOG.info("Successfully set constructor to {}: {}", name, f);
}
static CodecBuffer newCodecBuffer(ByteBuf buf) {
@@ -89,7 +89,7 @@ protected void finalize() {
* Note that there is a severe performance penalty for leak detection.
*/
public static void enableLeakDetection() {
- Factory.set(LeakDetector::newCodecBuffer);
+ Factory.set(LeakDetector::newCodecBuffer, "LeakDetector::newCodecBuffer");
}
/** The size of a buffer. */
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DelegatedCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DelegatedCodec.java
index dff0b015ed5..2ed92e66d2e 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DelegatedCodec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/DelegatedCodec.java
@@ -23,9 +23,9 @@
import java.io.IOException;
/**
- * A {@link Codec} to serialize/deserialize objects by delegation.
+ * A {@link org.apache.hadoop.hdds.utils.db.Codec} to serialize/deserialize objects by delegation.
*
- * @param The object type of this {@link Codec}.
+ * @param The object type of this {@link org.apache.hadoop.hdds.utils.db.Codec}.
* @param The object type of the {@link #delegate}.
*/
public class DelegatedCodec implements Codec {
@@ -47,31 +47,39 @@ public enum CopyType {
private final Codec delegate;
private final CheckedFunction forward;
private final CheckedFunction backward;
+ private final Class clazz;
private final CopyType copyType;
/**
* Construct a {@link Codec} using the given delegate.
*
* @param delegate the delegate {@link Codec}
- * @param forward a function to convert {@link DELEGATE} to {@link T}.
- * @param backward a function to convert {@link T} back to {@link DELEGATE}.
+ * @param forward a function to convert {@code DELEGATE} to {@code T}.
+ * @param backward a function to convert {@code T} back to {@code DELEGATE}.
* @param copyType How to {@link #copyObject(Object)}?
*/
public DelegatedCodec(Codec delegate,
CheckedFunction forward,
CheckedFunction backward,
- CopyType copyType) {
+ Class clazz, CopyType copyType) {
this.delegate = delegate;
this.forward = forward;
this.backward = backward;
+ this.clazz = clazz;
this.copyType = copyType;
}
/** The same as new DelegatedCodec(delegate, forward, backward, DEEP). */
public DelegatedCodec(Codec delegate,
CheckedFunction forward,
- CheckedFunction backward) {
- this(delegate, forward, backward, CopyType.DEEP);
+ CheckedFunction backward,
+ Class clazz) {
+ this(delegate, forward, backward, clazz, CopyType.DEEP);
+ }
+
+ @Override
+ public Class getTypeClass() {
+ return clazz;
}
@Override
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/IntegerCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/IntegerCodec.java
index 50488053159..d31be6fe976 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/IntegerCodec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/IntegerCodec.java
@@ -36,6 +36,11 @@ private IntegerCodec() {
// singleton
}
+ @Override
+ public Class getTypeClass() {
+ return Integer.class;
+ }
+
@Override
public boolean supportCodecBuffer() {
return true;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/LongCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/LongCodec.java
index 9e776cc18f7..cf481980008 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/LongCodec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/LongCodec.java
@@ -33,6 +33,11 @@ public static LongCodec get() {
private LongCodec() { }
+ @Override
+ public Class getTypeClass() {
+ return Long.class;
+ }
+
@Override
public boolean supportCodecBuffer() {
return true;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto2Codec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto2Codec.java
index 96d12d1ebe5..8eb4a307215 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto2Codec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto2Codec.java
@@ -47,12 +47,19 @@ public static Codec get(T t) {
return (Codec) codec;
}
+ private final Class clazz;
private final Parser parser;
private Proto2Codec(M m) {
+ this.clazz = (Class) m.getClass();
this.parser = (Parser) m.getParserForType();
}
+ @Override
+ public Class getTypeClass() {
+ return clazz;
+ }
+
@Override
public boolean supportCodecBuffer() {
return true;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto3Codec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto3Codec.java
index 30245e033e0..c1eb693a007 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto3Codec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Proto3Codec.java
@@ -47,12 +47,19 @@ public static Codec get(T t) {
return (Codec) codec;
}
+ private final Class clazz;
private final Parser parser;
private Proto3Codec(M m) {
+ this.clazz = (Class) m.getClass();
this.parser = (Parser) m.getParserForType();
}
+ @Override
+ public Class getTypeClass() {
+ return clazz;
+ }
+
@Override
public boolean supportCodecBuffer() {
return true;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/ShortCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/ShortCodec.java
index f6482e5712c..beb296a29d1 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/ShortCodec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/ShortCodec.java
@@ -37,6 +37,11 @@ private ShortCodec() {
// singleton
}
+ @Override
+ public Class getTypeClass() {
+ return Short.class;
+ }
+
@Override
public boolean supportCodecBuffer() {
return true;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
index 1df55237937..e35be632dc4 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
@@ -59,6 +59,11 @@ abstract class StringCodecBase implements Codec {
this.fixedLength = max == encoder.averageBytesPerChar();
}
+ @Override
+ public final Class getTypeClass() {
+ return String.class;
+ }
+
CharsetEncoder newEncoder() {
return charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/UuidCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/UuidCodec.java
index dfccaa0ab75..d05b748b52a 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/UuidCodec.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/UuidCodec.java
@@ -40,6 +40,11 @@ public static int getSerializedSize() {
private UuidCodec() { }
+ @Override
+ public Class getTypeClass() {
+ return UUID.class;
+ }
+
@Override
public boolean supportCodecBuffer() {
return true;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/ClientVersion.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/ClientVersion.java
index f3bd1a96b66..cc6695dc7d6 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/ClientVersion.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/ClientVersion.java
@@ -42,10 +42,6 @@ public enum ClientVersion implements ComponentVersion {
"This client version has support for Object Store and File " +
"System Optimized Bucket Layouts."),
- EC_REPLICA_INDEX_REQUIRED_IN_BLOCK_REQUEST(4,
- "This client version enforces replica index is set for fixing read corruption that could occur when " +
- "replicaIndex parameter is not validated before EC block reads."),
-
FUTURE_VERSION(-1, "Used internally when the server side is older and an"
+ " unknown client version has arrived from the client.");
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
index c61502ff4a8..4c0df91e1a6 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
@@ -120,6 +120,14 @@ public final class OzoneConfigKeys {
public static final String OZONE_FS_DATASTREAM_AUTO_THRESHOLD_DEFAULT
= "4MB";
+ /**
+ * Flag to allow server-side HBase-related features and enhancements to be enabled.
+ */
+ public static final String OZONE_HBASE_ENHANCEMENTS_ALLOWED
+ = "ozone.hbase.enhancements.allowed";
+ public static final boolean OZONE_HBASE_ENHANCEMENTS_ALLOWED_DEFAULT
+ = false;
+
/**
* Flag to enable hsync/hflush.
*/
@@ -193,9 +201,6 @@ public final class OzoneConfigKeys {
"ozone.client.ec.grpc.write.timeout";
public static final String OZONE_CLIENT_EC_GRPC_WRITE_TIMEOUT_DEFAULT = "30s";
- public static final String OZONE_EC_GRPC_ZERO_COPY_ENABLED =
- "ozone.ec.grpc.zerocopy.enabled";
- public static final boolean OZONE_EC_GRPC_ZERO_COPY_ENABLED_DEFAULT = true;
/**
* Ozone administrator users delimited by comma.
@@ -535,10 +540,6 @@ public final class OzoneConfigKeys {
public static final int OZONE_MANAGER_STRIPED_LOCK_SIZE_DEFAULT = 512;
- public static final String OZONE_CLIENT_LIST_TRASH_KEYS_MAX =
- "ozone.client.list.trash.keys.max";
- public static final int OZONE_CLIENT_LIST_TRASH_KEYS_MAX_DEFAULT = 1000;
-
public static final String OZONE_HTTP_BASEDIR = "ozone.http.basedir";
public static final String OZONE_HTTP_POLICY_KEY =
@@ -567,11 +568,6 @@ public final class OzoneConfigKeys {
"ozone.https.client.need-auth";
public static final boolean OZONE_CLIENT_HTTPS_NEED_AUTH_DEFAULT = false;
- public static final String OZONE_OM_KEYNAME_CHARACTER_CHECK_ENABLED_KEY =
- "ozone.om.keyname.character.check.enabled";
- public static final boolean OZONE_OM_KEYNAME_CHARACTER_CHECK_ENABLED_DEFAULT =
- false;
-
public static final int OZONE_INIT_DEFAULT_LAYOUT_VERSION_DEFAULT = -1;
public static final String OZONE_CLIENT_KEY_PROVIDER_CACHE_EXPIRY =
"ozone.client.key.provider.cache.expiry";
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
index b34a5d8387b..49bfa1eae21 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
@@ -40,7 +40,6 @@ public final class OzoneConsts {
public static final String SCM_CERT_SERIAL_ID = "scmCertSerialId";
public static final String PRIMARY_SCM_NODE_ID = "primaryScmNodeId";
- public static final String OZONE_SIMPLE_ROOT_USER = "root";
public static final String OZONE_SIMPLE_HDFS_USER = "hdfs";
public static final String STORAGE_ID = "storageID";
@@ -76,12 +75,6 @@ public final class OzoneConsts {
"EEE, dd MMM yyyy HH:mm:ss zzz";
public static final String OZONE_TIME_ZONE = "GMT";
- public static final String OZONE_COMPONENT = "component";
- public static final String OZONE_FUNCTION = "function";
- public static final String OZONE_RESOURCE = "resource";
- public static final String OZONE_USER = "user";
- public static final String OZONE_REQUEST = "request";
-
// OM Http server endpoints
public static final String OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT =
"/serviceList";
@@ -101,14 +94,9 @@ public final class OzoneConsts {
public static final String CONTAINER_EXTENSION = ".container";
- public static final String CONTAINER_META = ".meta";
-
- // Refer to {@link ContainerReader} for container storage layout on disk.
- public static final String CONTAINER_PREFIX = "containers";
public static final String CONTAINER_META_PATH = "metadata";
public static final String CONTAINER_TEMPORARY_CHUNK_PREFIX = "tmp";
public static final String CONTAINER_CHUNK_NAME_DELIMITER = ".";
- public static final String CONTAINER_ROOT_PREFIX = "repository";
public static final String FILE_HASH = "SHA-256";
public static final String MD5_HASH = "MD5";
@@ -128,13 +116,13 @@ public final class OzoneConsts {
* level DB names used by SCM and data nodes.
*/
public static final String CONTAINER_DB_SUFFIX = "container.db";
- public static final String PIPELINE_DB_SUFFIX = "pipeline.db";
public static final String DN_CONTAINER_DB = "-dn-" + CONTAINER_DB_SUFFIX;
public static final String OM_DB_NAME = "om.db";
public static final String SCM_DB_NAME = "scm.db";
public static final String OM_DB_BACKUP_PREFIX = "om.db.backup.";
public static final String SCM_DB_BACKUP_PREFIX = "scm.db.backup.";
public static final String CONTAINER_DB_NAME = "container.db";
+ public static final String WITNESSED_CONTAINER_DB_NAME = "witnessed_container.db";
public static final String STORAGE_DIR_CHUNKS = "chunks";
public static final String OZONE_DB_CHECKPOINT_REQUEST_FLUSH =
@@ -187,10 +175,8 @@ public final class OzoneConsts {
public static final String OM_USER_PREFIX = "$";
public static final String OM_S3_PREFIX = "S3:";
public static final String OM_S3_CALLER_CONTEXT_PREFIX = "S3Auth:S3G|";
- public static final String OM_S3_VOLUME_PREFIX = "s3";
public static final String OM_S3_SECRET = "S3Secret:";
public static final String OM_PREFIX = "Prefix:";
- public static final String OM_TENANT = "Tenant:";
/**
* Max chunk size limit.
@@ -198,11 +184,6 @@ public final class OzoneConsts {
public static final int OZONE_SCM_CHUNK_MAX_SIZE = 32 * 1024 * 1024;
- /**
- * Max OM Quota size of Long.MAX_VALUE.
- */
- public static final long MAX_QUOTA_IN_BYTES = Long.MAX_VALUE;
-
/**
* Quota RESET default is -1, which means quota is not set.
*/
@@ -214,36 +195,20 @@ public final class OzoneConsts {
*/
public enum Units { TB, GB, MB, KB, B }
- /**
- * Max number of keys returned per list buckets operation.
- */
- public static final int MAX_LISTBUCKETS_SIZE = 1024;
-
- /**
- * Max number of keys returned per list keys operation.
- */
- public static final int MAX_LISTKEYS_SIZE = 1024;
-
- /**
- * Max number of volumes returned per list volumes operation.
- */
- public static final int MAX_LISTVOLUMES_SIZE = 1024;
-
- public static final int INVALID_PORT = -1;
-
/**
* Object ID to identify reclaimable uncommitted blocks.
*/
public static final long OBJECT_ID_RECLAIM_BLOCKS = 0L;
-
/**
* Default SCM Datanode ID file name.
*/
public static final String OZONE_SCM_DATANODE_ID_FILE_DEFAULT = "datanode.id";
- // The ServiceListJSONServlet context attribute where OzoneManager
- // instance gets stored.
+ /**
+ * The ServiceListJSONServlet context attribute where OzoneManager
+ * instance gets stored.
+ */
public static final String OM_CONTEXT_ATTRIBUTE = "ozone.om";
public static final String SCM_CONTEXT_ATTRIBUTE = "ozone.scm";
@@ -308,12 +273,8 @@ private OzoneConsts() {
public static final String KEY_PREFIX = "keyPrefix";
public static final String ACL = "acl";
public static final String ACLS = "acls";
- public static final String USER_ACL = "userAcl";
- public static final String ADD_ACLS = "addAcls";
- public static final String REMOVE_ACLS = "removeAcls";
public static final String MAX_NUM_OF_BUCKETS = "maxNumOfBuckets";
public static final String HAS_SNAPSHOT = "hasSnapshot";
- public static final String TO_KEY_NAME = "toKeyName";
public static final String STORAGE_TYPE = "storageType";
public static final String RESOURCE_TYPE = "resourceType";
public static final String IS_VERSION_ENABLED = "isVersionEnabled";
@@ -323,7 +284,6 @@ private OzoneConsts() {
public static final String REPLICATION_TYPE = "replicationType";
public static final String REPLICATION_FACTOR = "replicationFactor";
public static final String REPLICATION_CONFIG = "replicationConfig";
- public static final String KEY_LOCATION_INFO = "keyLocationInfo";
public static final String MULTIPART_LIST = "multipartList";
public static final String UPLOAD_ID = "uploadID";
public static final String PART_NUMBER_MARKER = "partNumberMarker";
@@ -378,10 +338,6 @@ private OzoneConsts() {
public static final String JAVA_TMP_DIR = "java.io.tmpdir";
public static final String LOCALHOST = "localhost";
-
- public static final int S3_BUCKET_MIN_LENGTH = 3;
- public static final int S3_BUCKET_MAX_LENGTH = 64;
-
public static final int S3_SECRET_KEY_MIN_LENGTH = 8;
public static final int S3_REQUEST_HEADER_METADATA_SIZE_LIMIT_KB = 2;
@@ -398,7 +354,6 @@ private OzoneConsts() {
public static final String GDPR_ALGORITHM_NAME = "AES";
public static final int GDPR_DEFAULT_RANDOM_SECRET_LENGTH = 16;
public static final Charset GDPR_CHARSET = StandardCharsets.UTF_8;
- public static final String GDPR_LENGTH = "length";
public static final String GDPR_SECRET = "secret";
public static final String GDPR_ALGORITHM = "algorithm";
@@ -409,7 +364,7 @@ private OzoneConsts() {
* contains illegal characters when creating/renaming key.
*
* Avoid the following characters in a key name:
- * "\", "{", "}", "<", ">", "^", "%", "~", "#", "|", "`", "[", "]", Quotation
+ * {@literal "\", "{", "}", "<", ">", "^", "%", "~", "#", "|", "`", "[", "]"}, Quotation
* marks and Non-printable ASCII characters (128–255 decimal characters).
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
*/
@@ -426,13 +381,6 @@ private OzoneConsts() {
public static final String CONTAINER_DB_TYPE_ROCKSDB = "RocksDB";
- // SCM HA
- public static final String SCM_SERVICE_ID_DEFAULT = "scmServiceIdDefault";
-
- // SCM Ratis snapshot file to store the last applied index
- public static final String SCM_RATIS_SNAPSHOT_INDEX = "scmRatisSnapshotIndex";
-
- public static final String SCM_RATIS_SNAPSHOT_TERM = "scmRatisSnapshotTerm";
// An on-disk transient marker file used when replacing DB with checkpoint
public static final String DB_TRANSIENT_MARKER = "dbInconsistentMarker";
@@ -440,10 +388,7 @@ private OzoneConsts() {
// should remain prepared even after a restart.
public static final String PREPARE_MARKER = "prepareMarker";
- // TODO : rename this to OZONE_RATIS_SNAPSHOT_DIR and use it in both
- // SCM and OM
- public static final String OM_RATIS_SNAPSHOT_DIR = "snapshot";
- public static final String SCM_RATIS_SNAPSHOT_DIR = "snapshot";
+ public static final String OZONE_RATIS_SNAPSHOT_DIR = "snapshot";
public static final long DEFAULT_OM_UPDATE_ID = -1L;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java
index eec2ceeb5e8..2d0b2bb56fd 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneManagerVersion.java
@@ -44,6 +44,11 @@ public enum OzoneManagerVersion implements ComponentVersion {
ATOMIC_REWRITE_KEY(6, "OzoneManager version that supports rewriting key as atomic operation"),
HBASE_SUPPORT(7, "OzoneManager version that supports HBase integration"),
+ LIGHTWEIGHT_LIST_STATUS(8, "OzoneManager version that supports lightweight"
+ + " listStatus API."),
+
+ S3_OBJECT_TAGGING_API(9, "OzoneManager version that supports S3 object tagging APIs, such as " +
+ "PutObjectTagging, GetObjectTagging, and DeleteObjectTagging"),
FUTURE_VERSION(-1, "Used internally in the client when the server side is "
+ " newer and an unknown server version has arrived to the client.");
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java
index f8b3febfeca..03771915be4 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Checksum.java
@@ -33,6 +33,8 @@
import org.apache.hadoop.ozone.common.utils.BufferUtils;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class to compute and verify checksums for chunks.
@@ -40,6 +42,8 @@
* This class is not thread safe.
*/
public class Checksum {
+ public static final Logger LOG = LoggerFactory.getLogger(Checksum.class);
+
private static Function newMessageDigestFunction(
String algorithm) {
final MessageDigest md;
@@ -63,7 +67,7 @@ public static ByteString int2ByteString(int n) {
private static Function newChecksumByteBufferFunction(
Supplier constructor) {
final ChecksumByteBuffer algorithm = constructor.get();
- return data -> {
+ return data -> {
algorithm.reset();
algorithm.update(data);
return int2ByteString((int)algorithm.getValue());
@@ -97,6 +101,23 @@ Function newChecksumFunction() {
private final ChecksumType checksumType;
private final int bytesPerChecksum;
+ /**
+ * Caches computeChecksum() result when requested.
+ * This must be manually cleared when a new block chunk has been started.
+ */
+ private final ChecksumCache checksumCache;
+
+ /**
+ * BlockOutputStream needs to call this method to clear the checksum cache
+ * whenever a block chunk has been established.
+ */
+ public boolean clearChecksumCache() {
+ if (checksumCache != null) {
+ checksumCache.clear();
+ return true;
+ }
+ return false;
+ }
/**
* Constructs a Checksum object.
@@ -106,6 +127,24 @@ Function newChecksumFunction() {
public Checksum(ChecksumType type, int bytesPerChecksum) {
this.checksumType = type;
this.bytesPerChecksum = bytesPerChecksum;
+ this.checksumCache = null;
+ }
+
+ /**
+ * Constructs a Checksum object.
+ * @param type type of Checksum
+ * @param bytesPerChecksum number of bytes of data per checksum
+ * @param allowChecksumCache true to enable checksum cache
+ */
+ public Checksum(ChecksumType type, int bytesPerChecksum, boolean allowChecksumCache) {
+ this.checksumType = type;
+ this.bytesPerChecksum = bytesPerChecksum;
+ LOG.debug("allowChecksumCache = {}", allowChecksumCache);
+ if (allowChecksumCache) {
+ this.checksumCache = new ChecksumCache(bytesPerChecksum);
+ } else {
+ this.checksumCache = null;
+ }
}
/**
@@ -128,13 +167,25 @@ public ChecksumData computeChecksum(byte[] data)
return computeChecksum(ByteBuffer.wrap(data));
}
+ /**
+ * The default implementation of computeChecksum(ByteBuffer) that does not use cache, even if cache is initialized.
+ * This is a stop-gap solution before the protocol change.
+ * @param data ByteBuffer
+ * @return ChecksumData
+ * @throws OzoneChecksumException
+ */
+ public ChecksumData computeChecksum(ByteBuffer data)
+ throws OzoneChecksumException {
+ return computeChecksum(data, false);
+ }
+
/**
* Computes checksum for give data.
* @param data input data.
* @return ChecksumData computed for input data.
* @throws OzoneChecksumException thrown when ChecksumType is not recognized
*/
- public ChecksumData computeChecksum(ByteBuffer data)
+ public ChecksumData computeChecksum(ByteBuffer data, boolean useChecksumCache)
throws OzoneChecksumException {
// If type is set to NONE, we do not need to compute the checksums. We also
// need to avoid unnecessary conversions.
@@ -144,7 +195,7 @@ public ChecksumData computeChecksum(ByteBuffer data)
if (!data.isReadOnly()) {
data = data.asReadOnlyBuffer();
}
- return computeChecksum(ChunkBuffer.wrap(data));
+ return computeChecksum(ChunkBuffer.wrap(data), useChecksumCache);
}
public ChecksumData computeChecksum(List byteStrings)
@@ -154,8 +205,20 @@ public ChecksumData computeChecksum(List byteStrings)
return computeChecksum(ChunkBuffer.wrap(buffers));
}
+ /**
+ * The default implementation of computeChecksum(ChunkBuffer) that does not use cache, even if cache is initialized.
+ * This is a stop-gap solution before the protocol change.
+ * @param data ChunkBuffer
+ * @return ChecksumData
+ * @throws OzoneChecksumException
+ */
public ChecksumData computeChecksum(ChunkBuffer data)
throws OzoneChecksumException {
+ return computeChecksum(data, false);
+ }
+
+ public ChecksumData computeChecksum(ChunkBuffer data, boolean useCache)
+ throws OzoneChecksumException {
if (checksumType == ChecksumType.NONE) {
// Since type is set to NONE, we do not need to compute the checksums
return new ChecksumData(checksumType, bytesPerChecksum);
@@ -168,12 +231,20 @@ public ChecksumData computeChecksum(ChunkBuffer data)
throw new OzoneChecksumException(checksumType);
}
- // Checksum is computed for each bytesPerChecksum number of bytes of data
- // starting at offset 0. The last checksum might be computed for the
- // remaining data with length less than bytesPerChecksum.
- final List checksumList = new ArrayList<>();
- for (ByteBuffer b : data.iterate(bytesPerChecksum)) {
- checksumList.add(computeChecksum(b, function, bytesPerChecksum));
+ final List checksumList;
+ if (checksumCache == null || !useCache) {
+ // When checksumCache is not enabled:
+ // Checksum is computed for each bytesPerChecksum number of bytes of data
+ // starting at offset 0. The last checksum might be computed for the
+ // remaining data with length less than bytesPerChecksum.
+ checksumList = new ArrayList<>();
+ for (ByteBuffer b : data.iterate(bytesPerChecksum)) {
+ checksumList.add(computeChecksum(b, function, bytesPerChecksum)); // merge this?
+ }
+ } else {
+ // When checksumCache is enabled:
+ // We only need to update the last checksum in the cache, then pass it along.
+ checksumList = checksumCache.computeChecksum(data, function);
}
return new ChecksumData(checksumType, bytesPerChecksum, checksumList);
}
@@ -185,7 +256,7 @@ public ChecksumData computeChecksum(ChunkBuffer data)
* @param maxLength the max length of data
* @return computed checksum ByteString
*/
- private static ByteString computeChecksum(ByteBuffer data,
+ protected static ByteString computeChecksum(ByteBuffer data,
Function function, int maxLength) {
final int limit = data.limit();
try {
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumByteBufferImpl.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumByteBufferImpl.java
index 1d596bf7007..a5235978327 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumByteBufferImpl.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumByteBufferImpl.java
@@ -44,12 +44,14 @@ public class ChecksumByteBufferImpl implements ChecksumByteBuffer {
static {
Field f = null;
- try {
- f = ByteBuffer.class
- .getDeclaredField("isReadOnly");
- f.setAccessible(true);
- } catch (NoSuchFieldException e) {
- LOG.error("No isReadOnly field in ByteBuffer", e);
+ if (JavaUtils.isJavaVersionAtMost(8)) {
+ try {
+ f = ByteBuffer.class
+ .getDeclaredField("isReadOnly");
+ f.setAccessible(true);
+ } catch (NoSuchFieldException e) {
+ LOG.error("No isReadOnly field in ByteBuffer", e);
+ }
}
IS_READY_ONLY_FIELD = f;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumCache.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumCache.java
new file mode 100644
index 00000000000..0f6482919a3
--- /dev/null
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChecksumCache.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.ozone.common;
+
+import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Function;
+
+/**
+ * Cache previous checksums to avoid recomputing them.
+ * This is a stop-gap solution to reduce checksum calc overhead inside critical section
+ * without having to do a major refactoring/overhaul over protobuf and interfaces.
+ * This is only supposed to be used by BlockOutputStream, for now.
+ *
+ * Each BlockOutputStream has its own Checksum instance.
+ * Each block chunk (4 MB default) is divided into 16 KB (default) each for checksum calculation.
+ * For CRC32/CRC32C, each checksum takes 4 bytes. Thus each block chunk has 4 MB / 16 KB * 4 B = 1 KB of checksum data.
+ */
+public class ChecksumCache {
+ public static final Logger LOG = LoggerFactory.getLogger(ChecksumCache.class);
+
+ private final int bytesPerChecksum;
+ private final List checksums;
+ // Chunk length last time the checksum is computed
+ private int prevChunkLength;
+ // This only serves as a hint for array list initial allocation. The array list will still grow as needed.
+ private static final int BLOCK_CHUNK_SIZE = 4 * 1024 * 1024; // 4 MB
+
+ public ChecksumCache(int bytesPerChecksum) {
+ LOG.info("Initializing ChecksumCache with bytesPerChecksum = {}", bytesPerChecksum);
+ this.prevChunkLength = 0;
+ this.bytesPerChecksum = bytesPerChecksum;
+ // Set initialCapacity to avoid costly resizes
+ this.checksums = new ArrayList<>(BLOCK_CHUNK_SIZE / bytesPerChecksum);
+ }
+
+ /**
+ * Clear cached checksums. And reset the written index.
+ */
+ public void clear() {
+ prevChunkLength = 0;
+ checksums.clear();
+ }
+
+ public List getChecksums() {
+ return checksums;
+ }
+
+ public List computeChecksum(ChunkBuffer data, Function function) {
+ // Indicates how much data the current chunk buffer holds
+ final int currChunkLength = data.limit();
+
+ if (currChunkLength == prevChunkLength) {
+ LOG.debug("ChunkBuffer data limit same as last time ({}). No new checksums need to be computed", prevChunkLength);
+ return checksums;
+ }
+
+ // Sanity check
+ if (currChunkLength < prevChunkLength) {
+ // If currChunkLength <= lastChunkLength, it indicates a bug that needs to be addressed.
+ // It means BOS has not properly clear()ed the cache when a new chunk is started in that code path.
+ throw new IllegalArgumentException("ChunkBuffer data limit (" + currChunkLength + ")" +
+ " must not be smaller than last time (" + prevChunkLength + ")");
+ }
+
+ // One or more checksums need to be computed
+
+ // Start of the checksum index that need to be (re)computed
+ final int ciStart = prevChunkLength / bytesPerChecksum;
+ final int ciEnd = currChunkLength / bytesPerChecksum + (currChunkLength % bytesPerChecksum == 0 ? 0 : 1);
+ int i = 0;
+ for (ByteBuffer b : data.iterate(bytesPerChecksum)) {
+ if (i < ciStart) {
+ i++;
+ continue;
+ }
+
+ // variable i can either point to:
+ // 1. the last element in the list -- in which case the checksum needs to be updated
+ // 2. one after the last element -- in which case a new checksum needs to be added
+ assert i == checksums.size() - 1 || i == checksums.size();
+
+ // TODO: Furthermore for CRC32/CRC32C, it can be even more efficient by updating the last checksum byte-by-byte.
+ final ByteString checksum = Checksum.computeChecksum(b, function, bytesPerChecksum);
+ if (i == checksums.size()) {
+ checksums.add(checksum);
+ } else {
+ checksums.set(i, checksum);
+ }
+
+ i++;
+ }
+
+ // Sanity check
+ if (i != ciEnd) {
+ throw new IllegalStateException("ChecksumCache: Checksum index end does not match expectation");
+ }
+
+ // Update last written index
+ prevChunkLength = currChunkLength;
+ return checksums;
+ }
+}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBuffer.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBuffer.java
index 058934c2f27..a24d39e5dac 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBuffer.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBuffer.java
@@ -39,13 +39,12 @@ static ChunkBuffer allocate(int capacity) {
return allocate(capacity, 0);
}
- /**
- * Similar to {@link ByteBuffer#allocate(int)}
+ /** Similar to {@link ByteBuffer#allocate(int)}
* except that it can specify the increment.
*
* @param increment
* the increment size so that this buffer is allocated incrementally.
- * When increment <= 0, entire buffer is allocated in the beginning.
+ * When increment {@literal <= 0}, entire buffer is allocated in the beginning.
*/
static ChunkBuffer allocate(int capacity, int increment) {
if (increment > 0 && increment < capacity) {
@@ -60,7 +59,8 @@ static ChunkBuffer wrap(ByteBuffer buffer) {
return new ChunkBufferImplWithByteBuffer(buffer);
}
- /** Wrap the given list of {@link ByteBuffer}s as a {@link ChunkBuffer}. */
+ /** Wrap the given list of {@link ByteBuffer}s as a {@link ChunkBuffer},
+ * with a function called when buffers are released.*/
static ChunkBuffer wrap(List buffers) {
Objects.requireNonNull(buffers, "buffers == null");
if (buffers.size() == 1) {
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBuffer.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBuffer.java
index 36c16e92bf0..254be93dc4a 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBuffer.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBuffer.java
@@ -25,9 +25,9 @@
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Objects;
-import java.util.UUID;
import java.util.function.Function;
+import org.apache.hadoop.ozone.common.utils.BufferUtils;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.util.UncheckedAutoCloseable;
@@ -35,7 +35,6 @@
final class ChunkBufferImplWithByteBuffer implements ChunkBuffer {
private final ByteBuffer buffer;
private final UncheckedAutoCloseable underlying;
- private final UUID identity = UUID.randomUUID();
ChunkBufferImplWithByteBuffer(ByteBuffer buffer) {
this(buffer, null);
@@ -104,7 +103,7 @@ public List asByteBufferList() {
@Override
public long writeTo(GatheringByteChannel channel) throws IOException {
- return channel.write(buffer);
+ return BufferUtils.writeFully(channel, buffer);
}
@Override
@@ -163,6 +162,6 @@ public int hashCode() {
@Override
public String toString() {
return getClass().getSimpleName() + ":limit=" + buffer.limit()
- + "@" + identity;
+ + "@" + Integer.toHexString(super.hashCode());
}
}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java
index a3b5f9d2eef..e1f169662f8 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/ChunkBufferImplWithByteBufferList.java
@@ -23,6 +23,8 @@
import java.util.Collections;
import java.util.Iterator;
import java.util.NoSuchElementException;
+
+import org.apache.hadoop.ozone.common.utils.BufferUtils;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import java.io.IOException;
@@ -246,9 +248,9 @@ public List asByteBufferList() {
@Override
public long writeTo(GatheringByteChannel channel) throws IOException {
- long bytes = channel.write(buffers.toArray(new ByteBuffer[0]));
+ final long written = BufferUtils.writeFully(channel, buffers);
findCurrent();
- return bytes;
+ return written;
}
@Override
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/IncrementalChunkBuffer.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/IncrementalChunkBuffer.java
index dda4fae0d2b..732af4b6850 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/IncrementalChunkBuffer.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/IncrementalChunkBuffer.java
@@ -19,6 +19,7 @@
import com.google.common.base.Preconditions;
import org.apache.hadoop.hdds.utils.db.CodecBuffer;
+import org.apache.hadoop.ozone.common.utils.BufferUtils;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import java.io.IOException;
@@ -279,7 +280,7 @@ public List asByteBufferList() {
@Override
public long writeTo(GatheringByteChannel channel) throws IOException {
- return channel.write(buffers.toArray(new ByteBuffer[0]));
+ return BufferUtils.writeFully(channel, buffers);
}
@Override
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/utils/BufferUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/utils/BufferUtils.java
index c6ad754f19b..a266c3615b0 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/utils/BufferUtils.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/utils/BufferUtils.java
@@ -19,15 +19,23 @@
package org.apache.hadoop.ozone.common.utils;
import com.google.common.base.Preconditions;
+
+import java.io.IOException;
import java.nio.ByteBuffer;
+import java.nio.channels.GatheringByteChannel;
import java.util.ArrayList;
import java.util.List;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Utilities for buffers.
*/
public final class BufferUtils {
+ public static final Logger LOG = LoggerFactory.getLogger(BufferUtils.class);
+
+ private static final ByteBuffer[] EMPTY_BYTE_BUFFER_ARRAY = {};
/** Utility classes should not be constructed. **/
private BufferUtils() {
@@ -136,4 +144,46 @@ public static int getNumberOfBins(long numElements, int maxElementsPerBin) {
}
return Math.toIntExact(n);
}
+
+ /**
+ * Write all remaining bytes in buffer to the given channel.
+ */
+ public static long writeFully(GatheringByteChannel ch, ByteBuffer bb) throws IOException {
+ long written = 0;
+ while (bb.remaining() > 0) {
+ int n = ch.write(bb);
+ if (n < 0) {
+ throw new IllegalStateException("GatheringByteChannel.write returns " + n + " < 0 for " + ch);
+ }
+ written += n;
+ }
+ return written;
+ }
+
+ public static long writeFully(GatheringByteChannel ch, List buffers) throws IOException {
+ return BufferUtils.writeFully(ch, buffers.toArray(EMPTY_BYTE_BUFFER_ARRAY));
+ }
+
+ public static long writeFully(GatheringByteChannel ch, ByteBuffer[] buffers) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ for (int i = 0; i < buffers.length; i++) {
+ LOG.debug("buffer[{}]: remaining={}", i, buffers[i].remaining());
+ }
+ }
+
+ long written = 0;
+ for (int i = 0; i < buffers.length; i++) {
+ while (buffers[i].remaining() > 0) {
+ final long n = ch.write(buffers, i, buffers.length - i);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("buffer[{}]: remaining={}, written={}", i, buffers[i].remaining(), n);
+ }
+ if (n < 0) {
+ throw new IllegalStateException("GatheringByteChannel.write returns " + n + " < 0 for " + ch);
+ }
+ written += n;
+ }
+ }
+ return written;
+ }
}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java
index 4bd170df8e8..4fee39921b6 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockData.java
@@ -23,6 +23,7 @@
import org.apache.hadoop.hdds.utils.db.Codec;
import org.apache.hadoop.hdds.utils.db.DelegatedCodec;
import org.apache.hadoop.hdds.utils.db.Proto3Codec;
+import org.apache.hadoop.ozone.OzoneConsts;
import java.io.IOException;
import java.util.Collections;
@@ -38,7 +39,8 @@ public class BlockData {
private static final Codec CODEC = new DelegatedCodec<>(
Proto3Codec.get(ContainerProtos.BlockData.getDefaultInstance()),
BlockData::getFromProtoBuf,
- BlockData::getProtoBufMessage);
+ BlockData::getProtoBufMessage,
+ BlockData.class);
public static Codec getCodec() {
return CODEC;
@@ -252,7 +254,7 @@ public void setChunks(List chunks) {
size = singleChunk.getLen();
} else {
chunkList = chunks;
- size = chunks.parallelStream()
+ size = chunks.stream()
.mapToLong(ContainerProtos.ChunkInfo::getLen)
.sum();
}
@@ -280,4 +282,14 @@ public void appendTo(StringBuilder sb) {
sb.append(", size=").append(size);
sb.append("]");
}
+
+ public long getBlockGroupLength() {
+ String lenStr = getMetadata()
+ .get(OzoneConsts.BLOCK_GROUP_LEN_KEY_IN_PUT_BLOCK);
+ // If we don't have the length, then it indicates a problem with the stripe.
+ // All replica should carry the length, so if it is not there, we return 0,
+ // which will cause us to set the length of the block to zero and not
+ // attempt to reconstruct it.
+ return (lenStr == null) ? 0 : Long.parseLong(lenStr);
+ }
}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ChunkInfoList.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ChunkInfoList.java
index fdf40af9e09..ab5d39e9c3d 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ChunkInfoList.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ChunkInfoList.java
@@ -27,7 +27,7 @@
/**
* Helper class to convert between protobuf lists and Java lists of
- * {@link ContainerProtos.ChunkInfo} objects.
+ * {@link org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo} objects.
*
* This class is immutable.
*/
@@ -36,6 +36,7 @@ public class ChunkInfoList {
Proto3Codec.get(ContainerProtos.ChunkInfoList.getDefaultInstance()),
ChunkInfoList::getFromProtoBuf,
ChunkInfoList::getProtoBufMessage,
+ ChunkInfoList.class,
DelegatedCodec.CopyType.SHALLOW);
public static Codec getCodec() {
@@ -49,7 +50,7 @@ public ChunkInfoList(List chunks) {
}
/**
- * @return A new {@link ChunkInfoList} created from protobuf data.
+ * @return A new {@link #ChunkInfoList} created from protobuf data.
*/
public static ChunkInfoList getFromProtoBuf(
ContainerProtos.ChunkInfoList chunksProto) {
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/grpc/metrics/GrpcMetrics.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/grpc/metrics/GrpcMetrics.java
index 6bd83b44a93..6e0dde66986 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/grpc/metrics/GrpcMetrics.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/grpc/metrics/GrpcMetrics.java
@@ -33,6 +33,7 @@
import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.util.MetricUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -100,6 +101,8 @@ public static synchronized GrpcMetrics create(Configuration conf) {
*/
public void unRegister() {
DefaultMetricsSystem.instance().unregisterSource(SOURCE_NAME);
+ MetricUtil.stop(grpcProcessingTimeMillisQuantiles);
+ MetricUtil.stop(grpcQueueTimeMillisQuantiles);
}
@Override
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionInstanceFactory.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionInstanceFactory.java
index 83e63a2a322..b94dd024b2d 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionInstanceFactory.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionInstanceFactory.java
@@ -37,18 +37,16 @@
/**
* Generic factory which stores different instances of Type 'T' sharded by
- * a key & version. A single key can be associated with different versions
+ * a key and version. A single key can be associated with different versions
* of 'T'.
- *
* Why does this class exist?
* A typical use case during upgrade is to have multiple versions of a class
* / method / object and chose them based on current layout
* version at runtime. Before finalizing, an older version is typically
* needed, and after finalize, a newer version is needed. This class serves
* this purpose in a generic way.
- *
* For example, we can create a Factory to create multiple versions of
- * OMRequests sharded by Request Type & Layout Version Supported.
+ * OMRequests sharded by Request Type and Layout Version Supported.
*/
public class LayoutVersionInstanceFactory {
@@ -71,7 +69,7 @@ public class LayoutVersionInstanceFactory {
/**
* Register an instance with a given factory key (key + version).
* For safety reasons we dont allow (1) re-registering, (2) registering an
- * instance with version > SLV.
+ * instance with version > SLV.
*
* @param lvm LayoutVersionManager
* @param key VersionFactoryKey key to associate with instance.
@@ -138,13 +136,15 @@ private boolean isValid(LayoutVersionManager lvm, int version) {
}
/**
+ *
* From the list of versioned instances for a given "key", this
* returns the "floor" value corresponding to the given version.
- * For example, if we have key = "CreateKey", entry -> [(1, CreateKeyV1),
- * (3, CreateKeyV2), and if the passed in key = CreateKey & version = 2, we
+ * For example, if we have key = "CreateKey", entry -> [(1, CreateKeyV1),
+ * (3, CreateKeyV2), and if the passed in key = CreateKey & version = 2, we
* return CreateKeyV1.
* Since this is a priority queue based implementation, we use a O(1) peek()
* lookup to get the current valid version.
+ *
* @param lvm LayoutVersionManager
* @param key Key and Version.
* @return instance.
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java
index 3137d756e6b..a765c2c9455 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/LayoutVersionManager.java
@@ -74,7 +74,6 @@ public interface LayoutVersionManager {
/**
* Generic API for returning a registered handler for a given type.
* @param type String type
- * @return
*/
default Object getHandler(String type) {
return null;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalizer.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalizer.java
index 44ae94870e3..19c0498aa7a 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalizer.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/UpgradeFinalizer.java
@@ -50,14 +50,14 @@ public interface UpgradeFinalizer {
* Represents the current state in which the service is with regards to
* finalization after an upgrade.
* The state transitions are the following:
- * ALREADY_FINALIZED - no entry no exit from this status without restart.
+ * {@code ALREADY_FINALIZED} - no entry no exit from this status without restart.
* After an upgrade:
- * FINALIZATION_REQUIRED -(finalize)-> STARTING_FINALIZATION
- * -> FINALIZATION_IN_PROGRESS -> FINALIZATION_DONE from finalization done
+ * {@code FINALIZATION_REQUIRED -(finalize)-> STARTING_FINALIZATION
+ * -> FINALIZATION_IN_PROGRESS -> FINALIZATION_DONE} from finalization done
* there is no more move possible, after a restart the service can end up in:
- * - FINALIZATION_REQUIRED, if the finalization failed and have not reached
- * FINALIZATION_DONE,
- * - or it can be ALREADY_FINALIZED if the finalization was successfully done.
+ * {@code FINALIZATION_REQUIRED}, if the finalization failed and have not reached
+ * {@code FINALIZATION_DONE},
+ * - or it can be {@code ALREADY_FINALIZED} if the finalization was successfully done.
*/
enum Status {
ALREADY_FINALIZED,
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/VersionFactoryKey.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/VersionFactoryKey.java
index bda45f5a745..6465cc85501 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/VersionFactoryKey.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/upgrade/VersionFactoryKey.java
@@ -20,7 +20,7 @@
/**
* "Key" element to the Version specific instance factory. Currently it has 2
- * dimensions -> a 'key' string and a version. This is to support a factory
+ * dimensions -> a 'key' string and a version. This is to support a factory
* which returns an instance for a given "key" and "version".
*/
public class VersionFactoryKey {
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/MetricUtil.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/MetricUtil.java
index 23ff3c0f29e..9d903b900ac 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/MetricUtil.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/MetricUtil.java
@@ -110,4 +110,20 @@ public static List createQuantiles(MetricsRegistry registry,
sampleName, valueName, interval);
}).collect(Collectors.toList());
}
+
+ public static void stop(MutableQuantiles... quantiles) {
+ if (quantiles != null) {
+ stop(Arrays.asList(quantiles));
+ }
+ }
+
+ public static void stop(Iterable quantiles) {
+ if (quantiles != null) {
+ for (MutableQuantiles q : quantiles) {
+ if (q != null) {
+ q.stop();
+ }
+ }
+ }
+ }
}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetrics.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetrics.java
index 3f5150bd62c..39e887eaa49 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetrics.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetrics.java
@@ -22,7 +22,9 @@
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableStat;
+import java.io.Closeable;
import java.util.List;
+import java.util.Map;
/**
* The {@code PerformanceMetrics} class encapsulates a collection of related
@@ -30,7 +32,7 @@
* This class provides methods to update these metrics and to
* snapshot their values for reporting.
*/
-public class PerformanceMetrics {
+public class PerformanceMetrics implements Closeable {
private final MutableStat stat;
private final List quantiles;
private final MutableMinMax minMax;
@@ -43,12 +45,13 @@ public class PerformanceMetrics {
* @param intervals the intervals for quantiles computation. Note, each
* interval in 'intervals' increases memory usage, as it corresponds
* to a separate quantile calculator.
+ * @return {@link PerformanceMetrics} instances created, mapped by field name
*/
- public static synchronized void initializeMetrics(T source,
+ public static synchronized Map initializeMetrics(T source,
MetricsRegistry registry, String sampleName, String valueName,
int[] intervals) {
try {
- PerformanceMetricsInitializer.initialize(
+ return PerformanceMetricsInitializer.initialize(
source, registry, sampleName, valueName, intervals);
} catch (IllegalAccessException e) {
throw new RuntimeException("Failed to initialize PerformanceMetrics", e);
@@ -73,6 +76,11 @@ public PerformanceMetrics(
minMax = new MutableMinMax(registry, name, description, valueName);
}
+ @Override
+ public void close() {
+ MetricUtil.stop(quantiles);
+ }
+
/**
* Adds a value to all the aggregated metrics.
*
@@ -95,6 +103,5 @@ public void snapshot(MetricsRecordBuilder recordBuilder, boolean all) {
this.quantiles.forEach(quantile -> quantile.snapshot(recordBuilder, all));
this.minMax.snapshot(recordBuilder, all);
}
-
}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetricsInitializer.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetricsInitializer.java
index b2e83bb780c..cb6f77e9f5c 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetricsInitializer.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/util/PerformanceMetricsInitializer.java
@@ -21,6 +21,8 @@
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import java.lang.reflect.Field;
+import java.util.HashMap;
+import java.util.Map;
/**
* Utility class for initializing PerformanceMetrics in a MetricsSource.
@@ -36,11 +38,13 @@ private PerformanceMetricsInitializer() { }
* @param sampleName sample name
* @param valueName value name
* @param intervals intervals for quantiles
+ * @return {@link PerformanceMetrics} instances created, mapped by field name
* @throws IllegalAccessException if unable to access the field
*/
- public static void initialize(T source, MetricsRegistry registry,
+ public static Map initialize(T source, MetricsRegistry registry,
String sampleName, String valueName, int[] intervals)
throws IllegalAccessException {
+ Map instances = new HashMap<>();
Field[] fields = source.getClass().getDeclaredFields();
for (Field field : fields) {
@@ -54,8 +58,11 @@ public static void initialize(T source, MetricsRegistry registry,
sampleName, valueName, intervals);
field.setAccessible(true);
field.set(source, performanceMetrics);
+ instances.put(name, performanceMetrics);
}
}
}
+
+ return instances;
}
}
diff --git a/hadoop-hdds/common/src/main/resources/hdds-version-info.properties b/hadoop-hdds/common/src/main/resources/hdds-version-info.properties
index bf887021c5b..3ba2c2cbfa2 100644
--- a/hadoop-hdds/common/src/main/resources/hdds-version-info.properties
+++ b/hadoop-hdds/common/src/main/resources/hdds-version-info.properties
@@ -18,9 +18,6 @@
version=${declared.hdds.version}
revision=${version-info.scm.commit}
-branch=${version-info.scm.branch}
-user=${user.name}
-date=${version-info.build.time}
url=${version-info.scm.uri}
srcChecksum=${version-info.source.md5}
hadoopProtoc2Version=${proto2.hadooprpc.protobuf.version}
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 20c1bed89be..fdeb5c1c043 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -160,6 +160,13 @@
this not set. Ideally, this should be mapped to a fast disk like an SSD.
+
+ ozone.scm.container.list.max.count
+ 4096
+ OZONE, SCM, CONTAINER
+ The max number of containers info could be included in
+ response of ListContainer request.
+
hdds.datanode.dir
@@ -272,16 +279,16 @@
hdds.ratis.snapshot.threshold
- 10000
- OZONE, RATIS
+ 100000
+ OZONE, CONTAINER, RATIS
Number of transactions after which a ratis snapshot should be
taken.
hdds.container.ratis.statemachine.max.pending.apply-transactions
- 10000
- OZONE, RATIS
+ 100000
+ OZONE, CONTAINER, RATIS
Maximum number of pending apply transactions in a data
pipeline. The default value is kept same as default snapshot threshold
hdds.ratis.snapshot.threshold.
@@ -860,6 +867,15 @@
The default read threshold to use memory mapped buffers.
+
+ ozone.chunk.read.mapped.buffer.max.count
+ 0
+ OZONE, SCM, CONTAINER, PERFORMANCE
+
+ The default max count of memory mapped buffers allowed for a DN.
+ Default 0 means no mapped buffers allowed for data read.
+
+
ozone.scm.container.layout
FILE_PER_BLOCK
@@ -1561,7 +1577,7 @@
hdds.datanode.metadata.rocksdb.cache.size
- 64MB
+ 1GB
OZONE, DATANODE, MANAGEMENT
Size of the block metadata cache shared among RocksDB instances on each
@@ -3406,14 +3422,6 @@
unhealthy will each have their own limit.
-
- ozone.client.list.trash.keys.max
- 1000
- OZONE, CLIENT
-
- The maximum number of keys to return for a list trash request.
-
-
ozone.http.basedir
@@ -3468,9 +3476,9 @@
ozone.s3g.client.buffer.size
OZONE, S3GATEWAY
- 4KB
+ 4MB
- The size of the buffer which is for read block. (4KB by default).
+ The size of the buffer which is for read block. (4MB by default).
@@ -3742,6 +3750,15 @@
+
+ ozone.snapshot.deep.cleaning.enabled
+ false
+ OZONE, PERFORMANCE, OM
+
+ Flag to enable/disable snapshot deep cleaning.
+
+
+
ozone.scm.event.ContainerReport.thread.pool.size
10
@@ -4224,12 +4241,27 @@
+
+ ozone.hbase.enhancements.allowed
+ false
+ OZONE, OM
+
+ When set to false, server-side HBase enhancement-related Ozone (experimental) features
+ are disabled (not allowed to be enabled) regardless of whether those configs are set.
+
+ Here is the list of configs and values overridden when this config is set to false:
+ 1. ozone.fs.hsync.enabled = false
+
+ A warning message will be printed if any of the above configs are overridden by this.
+
+
ozone.fs.hsync.enabled
false
- OZONE, CLIENT
+ OZONE, CLIENT, OM
- Enable hsync/hflush. By default they are disabled.
+ Enable hsync/hflush on the Ozone Manager and/or client side. Disabled by default.
+ Can be enabled only when ozone.hbase.enhancements.allowed = true
@@ -4505,19 +4537,31 @@
- ozone.ec.grpc.zerocopy.enabled
- true
+ ozone.om.max.buckets
+ 100000
+ OZONE, OM
+
+ maximum number of buckets across all volumes.
+
+
+
+
+ ozone.volume.io.percentiles.intervals.seconds
+ 60
OZONE, DATANODE
- Specify if zero-copy should be enabled for EC GRPC protocol.
+ This setting specifies the interval (in seconds) for monitoring percentile performance metrics.
+ It helps in tracking the read and write performance of DataNodes in real-time,
+ allowing for better identification and analysis of performance issues.
+
- ozone.om.max.buckets
- 100000
+ ozone.om.server.list.max.size
+ 1000
OZONE, OM
- maximum number of buckets across all volumes.
+ Configuration property to configure the max server side response size for list calls on om.
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/TestDatanodeDetails.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/TestDatanodeDetails.java
index aeb1e207e70..78465fd2816 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/TestDatanodeDetails.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/TestDatanodeDetails.java
@@ -17,12 +17,16 @@
*/
package org.apache.hadoop.hdds.protocol;
+import com.google.common.collect.ImmutableSet;
import org.apache.hadoop.hdds.DatanodeVersion;
import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.junit.jupiter.api.Test;
import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
import static org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name.ALL_PORTS;
import static org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name.V0_PORTS;
@@ -48,21 +52,36 @@ void protoIncludesNewPortsOnlyForV1() {
subject.toProto(VERSION_HANDLES_UNKNOWN_DN_PORTS.toProtoValue());
assertPorts(protoV1, ALL_PORTS);
}
+ @Test
+ void testRequiredPortsProto() {
+ DatanodeDetails subject = MockDatanodeDetails.randomDatanodeDetails();
+ Set requiredPorts = Stream.of(Port.Name.STANDALONE, Port.Name.RATIS)
+ .collect(Collectors.toSet());
+ HddsProtos.DatanodeDetailsProto proto =
+ subject.toProto(subject.getCurrentVersion(), requiredPorts);
+ assertPorts(proto, ImmutableSet.copyOf(requiredPorts));
+
+ HddsProtos.DatanodeDetailsProto ioPortProto =
+ subject.toProto(subject.getCurrentVersion(), Name.IO_PORTS);
+ assertPorts(ioPortProto, ImmutableSet.copyOf(Name.IO_PORTS));
+ }
@Test
public void testNewBuilderCurrentVersion() {
// test that if the current version is not set (Ozone 1.4.0 and earlier),
// it falls back to SEPARATE_RATIS_PORTS_AVAILABLE
DatanodeDetails dn = MockDatanodeDetails.randomDatanodeDetails();
+ Set requiredPorts = Stream.of(Port.Name.STANDALONE, Port.Name.RATIS)
+ .collect(Collectors.toSet());
HddsProtos.DatanodeDetailsProto.Builder protoBuilder =
- dn.toProtoBuilder(DEFAULT_VERSION.toProtoValue());
+ dn.toProtoBuilder(DEFAULT_VERSION.toProtoValue(), requiredPorts);
protoBuilder.clearCurrentVersion();
DatanodeDetails dn2 = DatanodeDetails.newBuilder(protoBuilder.build()).build();
assertEquals(DatanodeVersion.SEPARATE_RATIS_PORTS_AVAILABLE.toProtoValue(), dn2.getCurrentVersion());
// test that if the current version is set, it is used
protoBuilder =
- dn.toProtoBuilder(DEFAULT_VERSION.toProtoValue());
+ dn.toProtoBuilder(DEFAULT_VERSION.toProtoValue(), requiredPorts);
DatanodeDetails dn3 = DatanodeDetails.newBuilder(protoBuilder.build()).build();
assertEquals(DatanodeVersion.CURRENT.toProtoValue(), dn3.getCurrentVersion());
}
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/common/helpers/TestExcludeList.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/common/helpers/TestExcludeList.java
index 5571330ee64..d8af0c4d5ab 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/common/helpers/TestExcludeList.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/common/helpers/TestExcludeList.java
@@ -37,19 +37,19 @@ public class TestExcludeList {
public void excludeNodesShouldBeCleanedBasedOnGivenTime() {
ExcludeList list = new ExcludeList(10, clock);
list.addDatanode(DatanodeDetails.newBuilder().setUuid(UUID.randomUUID())
- .setIpAddress("127.0.0.1").setHostName("localhost").addPort(
- DatanodeDetails.newPort(DatanodeDetails.Port.Name.STANDALONE, 2001))
+ .setIpAddress("127.0.0.1").setHostName("localhost")
+ .addPort(DatanodeDetails.newStandalonePort(2001))
.build());
assertEquals(1, list.getDatanodes().size());
clock.fastForward(11);
assertEquals(0, list.getDatanodes().size());
list.addDatanode(DatanodeDetails.newBuilder().setUuid(UUID.randomUUID())
- .setIpAddress("127.0.0.2").setHostName("localhost").addPort(
- DatanodeDetails.newPort(DatanodeDetails.Port.Name.STANDALONE, 2001))
+ .setIpAddress("127.0.0.2").setHostName("localhost")
+ .addPort(DatanodeDetails.newStandalonePort(2001))
.build());
list.addDatanode(DatanodeDetails.newBuilder().setUuid(UUID.randomUUID())
- .setIpAddress("127.0.0.3").setHostName("localhost").addPort(
- DatanodeDetails.newPort(DatanodeDetails.Port.Name.STANDALONE, 2001))
+ .setIpAddress("127.0.0.3").setHostName("localhost")
+ .addPort(DatanodeDetails.newStandalonePort(2001))
.build());
assertEquals(2, list.getDatanodes().size());
}
@@ -58,8 +58,8 @@ public void excludeNodesShouldBeCleanedBasedOnGivenTime() {
public void excludeNodeShouldNotBeCleanedIfExpiryTimeIsZero() {
ExcludeList list = new ExcludeList(0, clock);
list.addDatanode(DatanodeDetails.newBuilder().setUuid(UUID.randomUUID())
- .setIpAddress("127.0.0.1").setHostName("localhost").addPort(
- DatanodeDetails.newPort(DatanodeDetails.Port.Name.STANDALONE, 2001))
+ .setIpAddress("127.0.0.1").setHostName("localhost")
+ .addPort(DatanodeDetails.newStandalonePort(2001))
.build());
assertEquals(1, list.getDatanodes().size());
clock.fastForward(1);
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/MockGatheringChannel.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/MockGatheringChannel.java
index ce6f58dadcb..83b68512380 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/MockGatheringChannel.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/MockGatheringChannel.java
@@ -21,8 +21,11 @@
import java.nio.ByteBuffer;
import java.nio.channels.GatheringByteChannel;
import java.nio.channels.WritableByteChannel;
+import java.util.concurrent.ThreadLocalRandom;
import static com.google.common.base.Preconditions.checkElementIndex;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* {@link GatheringByteChannel} implementation for testing. Delegates
@@ -45,11 +48,32 @@ public long write(ByteBuffer[] srcs, int offset, int length)
checkElementIndex(offset, srcs.length, "offset");
checkElementIndex(offset + length - 1, srcs.length, "offset+length");
- long bytes = 0;
- for (ByteBuffer b : srcs) {
- bytes += write(b);
+ long fullLength = 0;
+ for (int i = offset; i < srcs.length; i++) {
+ fullLength += srcs[i].remaining();
}
- return bytes;
+ if (fullLength <= 0) {
+ return 0;
+ }
+
+ // simulate partial write by setting a random partial length
+ final long partialLength = ThreadLocalRandom.current().nextLong(fullLength + 1);
+
+ long written = 0;
+ for (int i = offset; i < srcs.length; i++) {
+ for (final ByteBuffer src = srcs[i]; src.hasRemaining();) {
+ final long n = partialLength - written; // write at most n bytes
+ assertThat(n).isGreaterThanOrEqualTo(0);
+ if (n == 0) {
+ return written;
+ }
+
+ final int remaining = src.remaining();
+ final int adjustment = remaining <= n ? 0 : Math.toIntExact(remaining - n);
+ written += adjustedWrite(src, adjustment);
+ }
+ }
+ return written;
}
@Override
@@ -59,7 +83,40 @@ public long write(ByteBuffer[] srcs) throws IOException {
@Override
public int write(ByteBuffer src) throws IOException {
- return delegate.write(src);
+ final int remaining = src.remaining();
+ if (remaining <= 0) {
+ return 0;
+ }
+ // Simulate partial write by a random adjustment.
+ final int adjustment = ThreadLocalRandom.current().nextInt(remaining + 1);
+ return adjustedWrite(src, adjustment);
+ }
+
+ /** Simulate partial write by the given adjustment. */
+ private int adjustedWrite(ByteBuffer src, int adjustment) throws IOException {
+ assertThat(adjustment).isGreaterThanOrEqualTo(0);
+ final int remaining = src.remaining();
+ if (remaining <= 0) {
+ return 0;
+ }
+ assertThat(adjustment).isLessThanOrEqualTo(remaining);
+
+ final int oldLimit = src.limit();
+ final int newLimit = oldLimit - adjustment;
+ src.limit(newLimit);
+ assertEquals(newLimit, src.limit());
+ final int toWrite = remaining - adjustment;
+ assertEquals(toWrite, src.remaining());
+
+ final int written = delegate.write(src);
+ assertEquals(newLimit, src.limit());
+ assertEquals(toWrite - written, src.remaining());
+
+ src.limit(oldLimit);
+ assertEquals(oldLimit, src.limit());
+ assertEquals(remaining - written, src.remaining());
+
+ return written;
}
@Override
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java
index 829f4bb150c..7ddb605c0f8 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksum.java
@@ -19,7 +19,10 @@
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.nio.ByteBuffer;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -35,23 +38,25 @@ public class TestChecksum {
private static final ContainerProtos.ChecksumType CHECKSUM_TYPE_DEFAULT =
ContainerProtos.ChecksumType.SHA256;
- private Checksum getChecksum(ContainerProtos.ChecksumType type) {
+ private Checksum getChecksum(ContainerProtos.ChecksumType type, boolean allowChecksumCache) {
if (type == null) {
type = CHECKSUM_TYPE_DEFAULT;
}
- return new Checksum(type, BYTES_PER_CHECKSUM);
+ return new Checksum(type, BYTES_PER_CHECKSUM, allowChecksumCache);
}
/**
* Tests {@link Checksum#verifyChecksum(byte[], ChecksumData)}.
*/
- @Test
- public void testVerifyChecksum() throws Exception {
- Checksum checksum = getChecksum(null);
+ @ParameterizedTest
+ @ValueSource(booleans = {true, false})
+ public void testVerifyChecksum(boolean useChecksumCache) throws Exception {
+ Checksum checksum = getChecksum(null, useChecksumCache);
int dataLen = 55;
byte[] data = RandomStringUtils.randomAlphabetic(dataLen).getBytes(UTF_8);
+ ByteBuffer byteBuffer = ByteBuffer.wrap(data);
- ChecksumData checksumData = checksum.computeChecksum(data);
+ ChecksumData checksumData = checksum.computeChecksum(byteBuffer, useChecksumCache);
// A checksum is calculate for each bytesPerChecksum number of bytes in
// the data. Since that value is 10 here and the data length is 55, we
@@ -65,11 +70,13 @@ public void testVerifyChecksum() throws Exception {
/**
* Tests that if data is modified, then the checksums should not match.
*/
- @Test
- public void testIncorrectChecksum() throws Exception {
- Checksum checksum = getChecksum(null);
+ @ParameterizedTest
+ @ValueSource(booleans = {true, false})
+ public void testIncorrectChecksum(boolean useChecksumCache) throws Exception {
+ Checksum checksum = getChecksum(null, useChecksumCache);
byte[] data = RandomStringUtils.randomAlphabetic(55).getBytes(UTF_8);
- ChecksumData originalChecksumData = checksum.computeChecksum(data);
+ ByteBuffer byteBuffer = ByteBuffer.wrap(data);
+ ChecksumData originalChecksumData = checksum.computeChecksum(byteBuffer, useChecksumCache);
// Change the data and check if new checksum matches the original checksum.
// Modifying one byte of data should be enough for the checksum data to
@@ -83,13 +90,14 @@ public void testIncorrectChecksum() throws Exception {
* Tests that checksum calculated using two different checksumTypes should
* not match.
*/
- @Test
- public void testChecksumMismatchForDifferentChecksumTypes() {
+ @ParameterizedTest
+ @ValueSource(booleans = {true, false})
+ public void testChecksumMismatchForDifferentChecksumTypes(boolean useChecksumCache) {
// Checksum1 of type SHA-256
- Checksum checksum1 = getChecksum(null);
+ Checksum checksum1 = getChecksum(null, useChecksumCache);
// Checksum2 of type CRC32
- Checksum checksum2 = getChecksum(ContainerProtos.ChecksumType.CRC32);
+ Checksum checksum2 = getChecksum(ContainerProtos.ChecksumType.CRC32, useChecksumCache);
// The two checksums should not match as they have different types
assertNotEquals(checksum1, checksum2, "Checksums should not match for different checksum types");
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksumCache.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksumCache.java
new file mode 100644
index 00000000000..49e0b75127a
--- /dev/null
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/common/TestChecksumCache.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.ozone.common;
+
+import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChecksumType;
+import org.apache.hadoop.ozone.common.Checksum.Algorithm;
+import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.function.Function;
+
+/**
+ * Test class for {@link ChecksumCache}.
+ */
+class TestChecksumCache {
+ public static final Logger LOG = LoggerFactory.getLogger(TestChecksumCache.class);
+
+ @ParameterizedTest
+ @EnumSource(ChecksumType.class)
+ void testComputeChecksum(ChecksumType checksumType) throws Exception {
+ final int bytesPerChecksum = 16;
+ ChecksumCache checksumCache = new ChecksumCache(bytesPerChecksum);
+
+ final int size = 66;
+ byte[] byteArray = new byte[size];
+ // Fill byteArray with bytes from 0 to 127 for deterministic testing
+ for (int i = 0; i < size; i++) {
+ byteArray[i] = (byte) (i % 128);
+ }
+
+ final Function function = Algorithm.valueOf(checksumType).newChecksumFunction();
+
+ int iEnd = size / bytesPerChecksum + (size % bytesPerChecksum == 0 ? 0 : 1);
+ List lastRes = null;
+ for (int i = 0; i < iEnd; i++) {
+ int byteBufferLength = Integer.min(byteArray.length, bytesPerChecksum * (i + 1));
+ ByteBuffer byteBuffer = ByteBuffer.wrap(byteArray, 0, byteBufferLength);
+
+ try (ChunkBuffer chunkBuffer = ChunkBuffer.wrap(byteBuffer.asReadOnlyBuffer())) {
+ List res = checksumCache.computeChecksum(chunkBuffer, function);
+ System.out.println(res);
+ // Verify that every entry in the res list except the last one is the same as the one in lastRes list
+ if (i > 0) {
+ for (int j = 0; j < res.size() - 1; j++) {
+ Assertions.assertEquals(lastRes.get(j), res.get(j));
+ }
+ }
+ lastRes = res;
+ }
+ }
+
+ // Sanity check
+ checksumCache.clear();
+ }
+}
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java
index 2b7592e1c35..20372dcc6ea 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java
@@ -293,18 +293,31 @@ public static Builder newReadChunkRequestBuilder(Pipeline pipeline,
*/
public static ContainerCommandRequestProto getCreateContainerRequest(
long containerID, Pipeline pipeline) throws IOException {
+ return getCreateContainerRequest(containerID, pipeline, ContainerProtos.ContainerDataProto.State.OPEN);
+ }
+
+
+ /**
+ * Returns a create container command for test purposes. There are a bunch of
+ * tests where we need to just send a request and get a reply.
+ *
+ * @return ContainerCommandRequestProto.
+ */
+ public static ContainerCommandRequestProto getCreateContainerRequest(
+ long containerID, Pipeline pipeline, ContainerProtos.ContainerDataProto.State state) throws IOException {
LOG.trace("addContainer: {}", containerID);
- return getContainerCommandRequestBuilder(containerID, pipeline).build();
+ return getContainerCommandRequestBuilder(containerID, pipeline, state)
+ .build();
}
private static Builder getContainerCommandRequestBuilder(long containerID,
- Pipeline pipeline) throws IOException {
+ Pipeline pipeline, ContainerProtos.ContainerDataProto.State state) throws IOException {
Builder request =
ContainerCommandRequestProto.newBuilder();
request.setCmdType(ContainerProtos.Type.CreateContainer);
request.setContainerID(containerID);
request.setCreateContainer(
- ContainerProtos.CreateContainerRequestProto.getDefaultInstance());
+ ContainerProtos.CreateContainerRequestProto.getDefaultInstance().toBuilder().setState(state).build());
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
return request;
@@ -320,7 +333,8 @@ public static ContainerCommandRequestProto getCreateContainerSecureRequest(
long containerID, Pipeline pipeline, Token> token) throws IOException {
LOG.trace("addContainer: {}", containerID);
- Builder request = getContainerCommandRequestBuilder(containerID, pipeline);
+ Builder request = getContainerCommandRequestBuilder(containerID, pipeline,
+ ContainerProtos.ContainerDataProto.State.OPEN);
if (token != null) {
request.setEncodedToken(token.encodeToUrlString());
}
diff --git a/hadoop-hdds/config/pom.xml b/hadoop-hdds/config/pom.xml
index 1c71bf3d90a..60c63475ae3 100644
--- a/hadoop-hdds/config/pom.xml
+++ b/hadoop-hdds/config/pom.xml
@@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd">
org.apache.ozone
hdds
- 1.5.0-SNAPSHOT
+ 2.0.0-SNAPSHOT
hdds-config
- 1.5.0-SNAPSHOT
+ 2.0.0-SNAPSHOT
Apache Ozone Distributed Data Store Config Tools
Apache Ozone HDDS Config
jar
diff --git a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java
index b1a20c9aecb..0d6c0c90878 100644
--- a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java
+++ b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigurationSource.java
@@ -108,7 +108,7 @@ default String[] getTrimmedStrings(String name) {
/**
* Gets the configuration entries where the key contains the prefix. This
* method will strip the prefix from the key in the return Map.
- * Example: somePrefix.key->value will be key->value in the returned map.
+ * Example: {@code somePrefix.key->value} will be {@code key->value} in the returned map.
* @param keyPrefix Prefix to search.
* @return Map containing keys that match and their values.
*/
diff --git a/hadoop-hdds/container-service/pom.xml b/hadoop-hdds/container-service/pom.xml
index d73bea95895..c21ca8203b5 100644
--- a/hadoop-hdds/container-service/pom.xml
+++ b/hadoop-hdds/container-service/pom.xml
@@ -20,10 +20,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd">
org.apache.ozone
hdds
- 1.5.0-SNAPSHOT
+ 2.0.0-SNAPSHOT
hdds-container-service
- 1.5.0-SNAPSHOT
+ 2.0.0-SNAPSHOT
Apache Ozone Distributed Data Store Container Service
Apache Ozone HDDS Container Service
jar
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBean.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBean.java
index d36fcdb6fc7..9c077a8e27b 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBean.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBean.java
@@ -26,4 +26,32 @@
*/
@InterfaceAudience.Private
public interface DNMXBean extends ServiceRuntimeInfo {
+
+ /**
+ * Gets the datanode hostname.
+ *
+ * @return the datanode hostname for the datanode.
+ */
+ String getHostname();
+
+ /**
+ * Gets the client rpc port.
+ *
+ * @return the client rpc port
+ */
+ String getClientRpcPort();
+
+ /**
+ * Gets the http port.
+ *
+ * @return the http port
+ */
+ String getHttpPort();
+
+ /**
+ * Gets the https port.
+ *
+ * @return the http port
+ */
+ String getHttpsPort();
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBeanImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBeanImpl.java
index f7b484c6bb3..5a0a4556636 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBeanImpl.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/DNMXBeanImpl.java
@@ -25,8 +25,53 @@
* This is the JMX management class for DN information.
*/
public class DNMXBeanImpl extends ServiceRuntimeInfoImpl implements DNMXBean {
- public DNMXBeanImpl(
- VersionInfo versionInfo) {
+
+ private String hostName;
+ private String clientRpcPort;
+ private String httpPort;
+ private String httpsPort;
+
+ public DNMXBeanImpl(VersionInfo versionInfo) {
super(versionInfo);
}
+
+ @Override
+ public String getHostname() {
+ return hostName;
+ }
+
+ @Override
+ public String getClientRpcPort() {
+ return clientRpcPort;
+ }
+
+ @Override
+ public String getHttpPort() {
+ return httpPort;
+ }
+
+ @Override
+ public String getHttpsPort() {
+ return httpsPort;
+ }
+
+ public void setHttpPort(String httpPort) {
+ this.httpPort = httpPort;
+ }
+
+ public void setHostName(String hostName) {
+ this.hostName = hostName;
+ }
+
+ public void setClientRpcPort(String rpcPort) {
+ this.clientRpcPort = rpcPort;
+ }
+
+ public String getHostName() {
+ return hostName;
+ }
+
+ public void setHttpsPort(String httpsPort) {
+ this.httpsPort = httpsPort;
+ }
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
index 6b32b74dc7c..de21e37503a 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
@@ -40,6 +40,7 @@
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.SecretKeyProtocol;
import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdds.scm.ha.SCMHAUtils;
import org.apache.hadoop.hdds.security.SecurityConfig;
import org.apache.hadoop.hdds.security.symmetric.DefaultSecretKeyClient;
import org.apache.hadoop.hdds.security.symmetric.SecretKeyClient;
@@ -116,8 +117,7 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin {
private final Map ratisMetricsMap =
new ConcurrentHashMap<>();
private List ratisReporterList = null;
- private DNMXBeanImpl serviceRuntimeInfo =
- new DNMXBeanImpl(HddsVersionInfo.HDDS_VERSION_INFO) { };
+ private DNMXBeanImpl serviceRuntimeInfo;
private ObjectName dnInfoBeanName;
private HddsDatanodeClientProtocolServer clientProtocolServer;
private OzoneAdmins admins;
@@ -210,6 +210,12 @@ public void start(OzoneConfiguration configuration) {
}
public void start() {
+ serviceRuntimeInfo = new DNMXBeanImpl(HddsVersionInfo.HDDS_VERSION_INFO) {
+ @Override
+ public String getNamespace() {
+ return SCMHAUtils.getScmServiceId(conf);
+ }
+ };
serviceRuntimeInfo.setStartTime();
ratisReporterList = RatisDropwizardExports
@@ -222,13 +228,13 @@ public void start() {
String ip = InetAddress.getByName(hostname).getHostAddress();
datanodeDetails = initializeDatanodeDetails();
datanodeDetails.setHostName(hostname);
+ serviceRuntimeInfo.setHostName(hostname);
datanodeDetails.setIpAddress(ip);
datanodeDetails.setVersion(
HddsVersionInfo.HDDS_VERSION_INFO.getVersion());
datanodeDetails.setSetupTime(Time.now());
datanodeDetails.setRevision(
HddsVersionInfo.HDDS_VERSION_INFO.getRevision());
- datanodeDetails.setBuildDate(HddsVersionInfo.HDDS_VERSION_INFO.getDate());
TracingUtil.initTracing(
"HddsDatanodeService." + datanodeDetails.getUuidString()
.substring(0, 8), conf);
@@ -295,23 +301,30 @@ public void start() {
httpServer = new HddsDatanodeHttpServer(conf);
httpServer.start();
HttpConfig.Policy policy = HttpConfig.getHttpPolicy(conf);
+
if (policy.isHttpEnabled()) {
- datanodeDetails.setPort(DatanodeDetails.newPort(HTTP,
- httpServer.getHttpAddress().getPort()));
+ int httpPort = httpServer.getHttpAddress().getPort();
+ datanodeDetails.setPort(DatanodeDetails.newPort(HTTP, httpPort));
+ serviceRuntimeInfo.setHttpPort(String.valueOf(httpPort));
}
+
if (policy.isHttpsEnabled()) {
- datanodeDetails.setPort(DatanodeDetails.newPort(HTTPS,
- httpServer.getHttpsAddress().getPort()));
+ int httpsPort = httpServer.getHttpAddress().getPort();
+ datanodeDetails.setPort(DatanodeDetails.newPort(HTTPS, httpsPort));
+ serviceRuntimeInfo.setHttpsPort(String.valueOf(httpsPort));
}
+
} catch (Exception ex) {
LOG.error("HttpServer failed to start.", ex);
}
-
clientProtocolServer = new HddsDatanodeClientProtocolServer(
datanodeDetails, conf, HddsVersionInfo.HDDS_VERSION_INFO,
reconfigurationHandler);
+ int clientRpcport = clientProtocolServer.getClientRpcAddress().getPort();
+ serviceRuntimeInfo.setClientRpcPort(String.valueOf(clientRpcport));
+
// Get admin list
String starterUser =
UserGroupInformation.getCurrentUser().getShortUserName();
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsPolicyProvider.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsPolicyProvider.java
index eeed4fab5f7..52217ce7f83 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsPolicyProvider.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsPolicyProvider.java
@@ -24,7 +24,7 @@
import org.apache.hadoop.security.authorize.Service;
import org.apache.ratis.util.MemoizedSupplier;
-import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.function.Supplier;
@@ -50,7 +50,7 @@ public static HddsPolicyProvider getInstance() {
}
private static final List DN_SERVICES =
- Arrays.asList(
+ Collections.singletonList(
new Service(
OZONE_SECURITY_RECONFIGURE_PROTOCOL_ACL,
ReconfigureProtocol.class)
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java
index 969add4a15c..3c08e58f9bf 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java
@@ -33,9 +33,7 @@
import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls;
import org.apache.hadoop.hdds.security.SecurityConfig;
import org.apache.hadoop.hdds.security.symmetric.SecretKeySignerClient;
-import org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider;
import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
-import org.apache.hadoop.hdds.utils.HAUtils;
import org.apache.hadoop.ozone.OzoneSecurityUtil;
import jakarta.annotation.Nonnull;
import org.apache.hadoop.ozone.container.common.helpers.TokenHelper;
@@ -73,11 +71,7 @@ private static XceiverClientManager createClientManager(
throws IOException {
ClientTrustManager trustManager = null;
if (OzoneSecurityUtil.isSecurityEnabled(conf)) {
- CACertificateProvider localCaCerts =
- () -> HAUtils.buildCAX509List(certificateClient, conf);
- CACertificateProvider remoteCacerts =
- () -> HAUtils.buildCAX509List(null, conf);
- trustManager = new ClientTrustManager(remoteCacerts, localCaCerts);
+ trustManager = certificateClient.createClientTrustManager();
}
DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
return new XceiverClientManager(conf,
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java
index ac42efd45ad..5d949e90b19 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ReconcileContainerTask.java
@@ -69,6 +69,16 @@ protected Object getCommandForDebug() {
return command.toString();
}
+ @Override
+ protected String getMetricName() {
+ return "ContainerReconciliations";
+ }
+
+ @Override
+ protected String getMetricDescriptionSegment() {
+ return "Container Reconciliations";
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java
index 7487f757fe5..80c390f3b83 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/BlockDeletingServiceMetrics.java
@@ -55,8 +55,7 @@ public final class BlockDeletingServiceMetrics {
@Metric(about = "The total number of DeleteBlockTransaction received")
private MutableCounterLong receivedTransactionCount;
- @Metric(about = "The total number of DeleteBlockTransaction" +
- " that is a retry Transaction")
+ @Metric(about = "The total number of DeleteBlockTransaction that is a retry Transaction")
private MutableCounterLong receivedRetryTransactionCount;
@Metric(about = "The total number of Container received to be processed")
@@ -74,10 +73,15 @@ public final class BlockDeletingServiceMetrics {
@Metric(about = "The total number of Container chosen to be deleted.")
private MutableGaugeLong totalContainerChosenCount;
- @Metric(about = "The total number of transactions which failed due" +
- " to container lock wait timeout.")
+ @Metric(about = "The total number of transactions which failed due to container lock wait timeout.")
private MutableGaugeLong totalLockTimeoutTransactionCount;
+ @Metric(about = "The number of delete block transactions successful.")
+ private MutableCounterLong processedTransactionSuccessCount;
+
+ @Metric(about = "The number of delete block transactions failed.")
+ private MutableGaugeLong processedTransactionFailCount;
+
private BlockDeletingServiceMetrics() {
}
@@ -112,6 +116,14 @@ public void incrFailureCount() {
this.failureCount.incr();
}
+ public void incrProcessedTransactionSuccessCount(long count) {
+ processedTransactionSuccessCount.incr(count);
+ }
+
+ public void incrProcessedTransactionFailCount(long count) {
+ processedTransactionFailCount.incr(count);
+ }
+
public void incrReceivedTransactionCount(long count) {
receivedTransactionCount.incr(count);
}
@@ -184,6 +196,14 @@ public long getTotalLockTimeoutTransactionCount() {
return totalLockTimeoutTransactionCount.value();
}
+ public long getProcessedTransactionSuccessCount() {
+ return processedTransactionSuccessCount.value();
+ }
+
+ public long getProcessedTransactionFailCount() {
+ return processedTransactionFailCount.value();
+ }
+
@Override
public String toString() {
StringBuffer buffer = new StringBuffer();
@@ -202,6 +222,10 @@ public String toString() {
+ receivedTransactionCount.value()).append("\t")
.append("receivedRetryTransactionCount = "
+ receivedRetryTransactionCount.value()).append("\t")
+ .append("processedTransactionSuccessCount = "
+ + processedTransactionSuccessCount.value()).append("\t")
+ .append("processedTransactionFailCount = "
+ + processedTransactionFailCount.value()).append("\t")
.append("receivedContainerCount = "
+ receivedContainerCount.value()).append("\t")
.append("receivedBlockCount = "
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/CommandHandlerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/CommandHandlerMetrics.java
index a6e4d6258d9..e52565952a5 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/CommandHandlerMetrics.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/CommandHandlerMetrics.java
@@ -34,6 +34,7 @@
import static org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics.CommandMetricsMetricsInfo.TotalRunTimeMs;
import static org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics.CommandMetricsMetricsInfo.QueueWaitingTaskCount;
import static org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics.CommandMetricsMetricsInfo.InvocationCount;
+import static org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics.CommandMetricsMetricsInfo.AvgRunTimeMs;
import static org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics.CommandMetricsMetricsInfo.ThreadPoolActivePoolSize;
import static org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics.CommandMetricsMetricsInfo.ThreadPoolMaxPoolSize;
import static org.apache.hadoop.ozone.container.common.helpers.CommandHandlerMetrics.CommandMetricsMetricsInfo.CommandReceivedCount;
@@ -46,6 +47,7 @@ public final class CommandHandlerMetrics implements MetricsSource {
enum CommandMetricsMetricsInfo implements MetricsInfo {
Command("The type of the SCM command"),
TotalRunTimeMs("The total runtime of the command handler in milliseconds"),
+ AvgRunTimeMs("Average run time of the command handler in milliseconds"),
QueueWaitingTaskCount("The number of queued tasks waiting for execution"),
InvocationCount("The number of times the command handler has been invoked"),
ThreadPoolActivePoolSize("The number of active threads in the thread pool"),
@@ -108,6 +110,7 @@ public void getMetrics(MetricsCollector collector, boolean all) {
commandHandler.getCommandType().name());
builder.addGauge(TotalRunTimeMs, commandHandler.getTotalRunTime());
+ builder.addGauge(AvgRunTimeMs, commandHandler.getAverageRunTime());
builder.addGauge(QueueWaitingTaskCount, commandHandler.getQueuedCount());
builder.addGauge(InvocationCount, commandHandler.getInvocationCount());
int activePoolSize = commandHandler.getThreadPoolActivePoolSize();
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java
index 91bdb17cda9..03dbce061bb 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java
@@ -30,7 +30,9 @@
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableRate;
+import org.apache.hadoop.ozone.util.MetricUtil;
+import java.io.Closeable;
import java.util.EnumMap;
/**
@@ -47,7 +49,7 @@
*/
@InterfaceAudience.Private
@Metrics(about = "Storage Container DataNode Metrics", context = "dfs")
-public class ContainerMetrics {
+public class ContainerMetrics implements Closeable {
public static final String STORAGE_CONTAINER_METRICS =
"StorageContainerMetrics";
@Metric private MutableCounterLong numOps;
@@ -109,6 +111,11 @@ public static void remove() {
ms.unregisterSource(STORAGE_CONTAINER_METRICS);
}
+ @Override
+ public void close() {
+ opsLatQuantiles.values().forEach(MetricUtil::stop);
+ }
+
public void incContainerOpsMetrics(ContainerProtos.Type type) {
numOps.incr();
numOpsArray.get(type).incr();
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java
index 5392af1deb2..b1b65dc5850 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java
@@ -142,8 +142,7 @@ public BackgroundTaskQueue getTasks() {
chooseContainerForBlockDeletion(getBlockLimitPerInterval(),
containerDeletionPolicy);
- BackgroundTask
- containerBlockInfos = null;
+ BackgroundTask containerBlockInfos = null;
long totalBlocks = 0;
for (ContainerBlockInfo containerBlockInfo : containers) {
BlockDeletingTaskBuilder builder =
@@ -155,13 +154,11 @@ public BackgroundTaskQueue getTasks() {
containerBlockInfos = builder.build();
queue.add(containerBlockInfos);
totalBlocks += containerBlockInfo.getNumBlocksToDelete();
+ LOG.debug("Queued- Container: {}, deleted blocks: {}",
+ containerBlockInfo.getContainerData().getContainerID(), containerBlockInfo.getNumBlocksToDelete());
}
metrics.incrTotalBlockChosenCount(totalBlocks);
metrics.incrTotalContainerChosenCount(containers.size());
- if (containers.size() > 0) {
- LOG.debug("Queued {} blocks from {} containers for deletion",
- totalBlocks, containers.size());
- }
} catch (StorageContainerException e) {
LOG.warn("Failed to initiate block deleting tasks, "
+ "caused by unable to get containers info. "
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java
index b5dfd07d576..8dd35064e6b 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java
@@ -23,8 +23,12 @@
import com.google.common.collect.ImmutableMap;
import com.google.protobuf.Message;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State;
+
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.hdds.utils.db.InMemoryTestTable;
+import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
import org.apache.hadoop.ozone.container.common.utils.ContainerLogger;
@@ -65,10 +69,24 @@ public class ContainerSet implements Iterable> {
new ConcurrentSkipListMap<>();
private Clock clock;
private long recoveringTimeout;
+ private final Table containerIdsTable;
+ @VisibleForTesting
public ContainerSet(long recoveringTimeout) {
+ this(new InMemoryTestTable<>(), recoveringTimeout);
+ }
+
+ public ContainerSet(Table continerIdsTable, long recoveringTimeout) {
+ this(continerIdsTable, recoveringTimeout, false);
+ }
+
+ public ContainerSet(Table continerIdsTable, long recoveringTimeout, boolean readOnly) {
this.clock = Clock.system(ZoneOffset.UTC);
+ this.containerIdsTable = continerIdsTable;
this.recoveringTimeout = recoveringTimeout;
+ if (!readOnly && containerIdsTable == null) {
+ throw new IllegalArgumentException("Container table cannot be null when container set is not read only");
+ }
}
public long getCurrentTime() {
@@ -85,22 +103,64 @@ public void setRecoveringTimeout(long recoveringTimeout) {
this.recoveringTimeout = recoveringTimeout;
}
+ /**
+ * Add Container to container map. This would fail if the container is already present or has been marked as missing.
+ * @param container container to be added
+ * @return If container is added to containerMap returns true, otherwise
+ * false
+ */
+ public boolean addContainer(Container> container) throws StorageContainerException {
+ return addContainer(container, false);
+ }
+
+ /**
+ * Add Container to container map. This would overwrite the container even if it is missing. But would fail if the
+ * container is already present.
+ * @param container container to be added
+ * @return If container is added to containerMap returns true, otherwise
+ * false
+ */
+ public boolean addContainerByOverwriteMissingContainer(Container> container) throws StorageContainerException {
+ return addContainer(container, true);
+ }
+
+ public void ensureContainerNotMissing(long containerId, State state) throws StorageContainerException {
+ if (missingContainerSet.contains(containerId)) {
+ throw new StorageContainerException(String.format("Container with container Id %d with state : %s is missing in" +
+ " the DN.", containerId, state),
+ ContainerProtos.Result.CONTAINER_MISSING);
+ }
+ }
+
/**
* Add Container to container map.
* @param container container to be added
+ * @param overwrite if true should overwrite the container if the container was missing.
* @return If container is added to containerMap returns true, otherwise
* false
*/
- public boolean addContainer(Container> container) throws
+ private boolean addContainer(Container> container, boolean overwrite) throws
StorageContainerException {
Preconditions.checkNotNull(container, "container cannot be null");
long containerId = container.getContainerData().getContainerID();
+ State containerState = container.getContainerData().getState();
+ if (!overwrite) {
+ ensureContainerNotMissing(containerId, containerState);
+ }
if (containerMap.putIfAbsent(containerId, container) == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Container with container Id {} is added to containerMap",
containerId);
}
+ try {
+ if (containerIdsTable != null) {
+ containerIdsTable.put(containerId, containerState.toString());
+ }
+ } catch (IOException e) {
+ throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION);
+ }
+ missingContainerSet.remove(containerId);
// wish we could have done this from ContainerData.setState
container.getContainerData().commitSpace();
if (container.getContainerData().getState() == RECOVERING) {
@@ -122,21 +182,69 @@ public boolean addContainer(Container> container) throws
* @return Container
*/
public Container> getContainer(long containerId) {
- Preconditions.checkState(containerId >= 0,
- "Container Id cannot be negative.");
+ Preconditions.checkState(containerId >= 0, "Container Id cannot be negative.");
return containerMap.get(containerId);
}
+ /**
+ * Removes container from both memory and database. This should be used when the containerData on disk has been
+ * removed completely from the node.
+ * @param containerId
+ * @return True if container is removed from containerMap.
+ * @throws StorageContainerException
+ */
+ public boolean removeContainer(long containerId) throws StorageContainerException {
+ return removeContainer(containerId, false, true);
+ }
+
+ /**
+ * Removes containerId from memory. This needs to be used when the container is still present on disk, and the
+ * inmemory state of the container needs to be updated.
+ * @param containerId
+ * @return True if container is removed from containerMap.
+ * @throws StorageContainerException
+ */
+ public boolean removeContainerOnlyFromMemory(long containerId) throws StorageContainerException {
+ return removeContainer(containerId, false, false);
+ }
+
+ /**
+ * Marks a container to be missing, thus it removes the container from inmemory containerMap and marks the
+ * container as missing.
+ * @param containerId
+ * @return True if container is removed from containerMap.
+ * @throws StorageContainerException
+ */
+ public boolean removeMissingContainer(long containerId) throws StorageContainerException {
+ return removeContainer(containerId, true, false);
+ }
+
/**
* Removes the Container matching with specified containerId.
* @param containerId ID of the container to remove
* @return If container is removed from containerMap returns true, otherwise
* false
*/
- public boolean removeContainer(long containerId) {
+ private boolean removeContainer(long containerId, boolean markMissing, boolean removeFromDB)
+ throws StorageContainerException {
Preconditions.checkState(containerId >= 0,
"Container Id cannot be negative.");
+ //We need to add to missing container set before removing containerMap since there could be write chunk operation
+ // that could recreate the container in another volume if we remove it from the map before adding to missing
+ // container.
+ if (markMissing) {
+ missingContainerSet.add(containerId);
+ }
Container> removed = containerMap.remove(containerId);
+ if (removeFromDB) {
+ try {
+ if (containerIdsTable != null) {
+ containerIdsTable.delete(containerId);
+ }
+ } catch (IOException e) {
+ throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION);
+ }
+ }
if (removed == null) {
LOG.debug("Container with containerId {} is not present in " +
"containerMap", containerId);
@@ -189,22 +297,21 @@ public int containerCount() {
* Send FCR which will not contain removed containers.
*
* @param context StateContext
- * @return
*/
- public void handleVolumeFailures(StateContext context) {
+ public void handleVolumeFailures(StateContext context) throws StorageContainerException {
AtomicBoolean failedVolume = new AtomicBoolean(false);
AtomicInteger containerCount = new AtomicInteger(0);
- containerMap.values().forEach(c -> {
+ for (Container> c : containerMap.values()) {
ContainerData data = c.getContainerData();
if (data.getVolume().isFailed()) {
- removeContainer(data.getContainerID());
+ removeMissingContainer(data.getContainerID());
LOG.debug("Removing Container {} as the Volume {} " +
- "has failed", data.getContainerID(), data.getVolume());
+ "has failed", data.getContainerID(), data.getVolume());
failedVolume.set(true);
containerCount.incrementAndGet();
ContainerLogger.logLost(data, "Volume failure");
}
- });
+ }
if (failedVolume.get()) {
try {
@@ -252,6 +359,21 @@ public Iterator> getContainerIterator(HddsVolume volume) {
.iterator();
}
+ /**
+ * Get the number of containers based on the given volume.
+ *
+ * @param volume hdds volume.
+ * @return number of containers
+ */
+ public long containerCount(HddsVolume volume) {
+ Preconditions.checkNotNull(volume);
+ Preconditions.checkNotNull(volume.getStorageID());
+ String volumeUuid = volume.getStorageID();
+ return containerMap.values().stream()
+ .filter(x -> volumeUuid.equals(x.getContainerData().getVolume()
+ .getStorageID())).count();
+ }
+
/**
* Return an containerMap iterator over {@link ContainerSet#containerMap}.
* @return containerMap Iterator
@@ -348,6 +470,10 @@ public Set getMissingContainerSet() {
return missingContainerSet;
}
+ public Table getContainerIdsTable() {
+ return containerIdsTable;
+ }
+
/**
* Builds the missing container set by taking a diff between total no
* containers actually found and number of containers which actually
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
index 417fb443eef..cd99b909231 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java
@@ -177,7 +177,8 @@ private boolean canIgnoreException(Result result) {
case CONTAINER_UNHEALTHY:
case CLOSED_CONTAINER_IO:
case DELETE_ON_OPEN_CONTAINER:
- case UNSUPPORTED_REQUEST: // Blame client for sending unsupported request.
+ case UNSUPPORTED_REQUEST:// Blame client for sending unsupported request.
+ case CONTAINER_MISSING:
return true;
default:
return false;
@@ -278,7 +279,8 @@ private ContainerCommandResponseProto dispatchRequest(
getMissingContainerSet().remove(containerID);
}
}
- if (getMissingContainerSet().contains(containerID)) {
+ if (cmdType != Type.CreateContainer && !HddsUtils.isReadOnly(msg)
+ && getMissingContainerSet().contains(containerID)) {
StorageContainerException sce = new StorageContainerException(
"ContainerID " + containerID
+ " has been lost and cannot be recreated on this DataNode",
@@ -649,7 +651,7 @@ public Handler getHandler(ContainerProtos.ContainerType containerType) {
@Override
public void setClusterId(String clusterId) {
- Preconditions.checkNotNull(clusterId, "clusterId Cannot be null");
+ Preconditions.checkNotNull(clusterId, "clusterId cannot be null");
if (this.clusterId == null) {
this.clusterId = clusterId;
for (Map.Entry handlerMap : handlers.entrySet()) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/OpenContainerBlockMap.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/OpenContainerBlockMap.java
index d6ca2d120e6..2e11cde3d9e 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/OpenContainerBlockMap.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/OpenContainerBlockMap.java
@@ -35,7 +35,7 @@
/**
* Map: containerId {@literal ->} (localId {@literal ->} {@link BlockData}).
* The outer container map does not entail locking for a better performance.
- * The inner {@link BlockDataMap} is synchronized.
+ * The inner {@code BlockDataMap} is synchronized.
*
* This class will maintain list of open keys per container when closeContainer
* command comes, it should autocommit all open keys of a open container before
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicyTemplate.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicyTemplate.java
index c584ba79037..bb47b5b9b6f 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicyTemplate.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicyTemplate.java
@@ -90,7 +90,7 @@ public final List chooseContainerForBlockDeletion(
/**
* Abstract step for ordering the container data to be deleted.
* Subclass need to implement the concrete ordering implementation
- * in descending order (more prioritized -> less prioritized)
+ * in descending order (more prioritized -> less prioritized)
* @param candidateContainers candidate containers to be ordered
*/
protected abstract void orderByDescendingPriority(
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java
index d02bae0a35a..f075b6f67ca 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java
@@ -75,7 +75,6 @@ void validateContainerCommand(
/**
* Returns the handler for the specified containerType.
* @param containerType
- * @return
*/
Handler getHandler(ContainerProtos.ContainerType containerType);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
index 77a4d97878d..fb9dc49071b 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
@@ -23,6 +23,7 @@
import java.io.OutputStream;
import java.util.Set;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
@@ -96,7 +97,8 @@ public abstract StateMachine.DataChannel getStreamDataChannel(
*
* @return datanode Id
*/
- protected String getDatanodeId() {
+ @VisibleForTesting
+ public String getDatanodeId() {
return datanodeId;
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index 55fcbcdb3cc..9d157cc9912 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -218,7 +218,6 @@ public DatanodeStateMachine(HddsDatanodeService hddsDatanodeService,
ReplicationSupervisorMetrics.create(supervisor);
ecReconstructionMetrics = ECReconstructionMetrics.create();
-
ecReconstructionCoordinator = new ECReconstructionCoordinator(
conf, certClient, secretKeyClient, context, ecReconstructionMetrics,
threadNamePrefix);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java
index a6c3b11de92..b3854e7ecd2 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java
@@ -234,12 +234,17 @@ public void logIfNeeded(Exception ex) {
}
if (missCounter == 0) {
+ long missedDurationSeconds = TimeUnit.MILLISECONDS.toSeconds(
+ this.getMissedCount() * getScmHeartbeatInterval(this.conf)
+ );
LOG.warn(
- "Unable to communicate to {} server at {} for past {} seconds.",
- serverName,
- getAddress().getHostString() + ":" + getAddress().getPort(),
- TimeUnit.MILLISECONDS.toSeconds(this.getMissedCount() *
- getScmHeartbeatInterval(this.conf)), ex);
+ "Unable to communicate to {} server at {}:{} for past {} seconds.",
+ serverName,
+ address.getAddress(),
+ address.getPort(),
+ missedDurationSeconds,
+ ex
+ );
}
if (LOG.isTraceEnabled()) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java
index 8533f7384d4..cd032d4b275 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java
@@ -18,7 +18,6 @@
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@@ -32,6 +31,8 @@
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.CloseContainerCommandProto;
import org.apache.hadoop.hdds.tracing.TracingUtil;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.statemachine
.SCMConnectionManager;
@@ -58,11 +59,11 @@ public class CloseContainerCommandHandler implements CommandHandler {
private final AtomicLong invocationCount = new AtomicLong(0);
private final AtomicInteger queuedCount = new AtomicInteger(0);
- private final ExecutorService executor;
- private long totalTime;
+ private final ThreadPoolExecutor executor;
+ private final MutableRate opsLatencyMs;
/**
- * Constructs a ContainerReport handler.
+ * Constructs a close container command handler.
*/
public CloseContainerCommandHandler(
int threadPoolSize, int queueSize, String threadNamePrefix) {
@@ -73,6 +74,9 @@ public CloseContainerCommandHandler(
new ThreadFactoryBuilder()
.setNameFormat(threadNamePrefix + "CloseContainerThread-%d")
.build());
+ MetricsRegistry registry = new MetricsRegistry(
+ CloseContainerCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(SCMCommandProto.Type.closeContainerCommand + "Ms");
}
/**
@@ -156,7 +160,7 @@ public void handle(SCMCommand command, OzoneContainer ozoneContainer,
LOG.error("Can't close container #{}", containerId, e);
} finally {
long endTime = Time.monotonicNow();
- totalTime += endTime - startTime;
+ this.opsLatencyMs.add(endTime - startTime);
}
}, executor).whenComplete((v, e) -> queuedCount.decrementAndGet());
}
@@ -205,19 +209,26 @@ public int getInvocationCount() {
*/
@Override
public long getAverageRunTime() {
- if (invocationCount.get() > 0) {
- return totalTime / invocationCount.get();
- }
- return 0;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime;
+ return (long) this.opsLatencyMs.lastStat().total();
}
@Override
public int getQueuedCount() {
return queuedCount.get();
}
+
+ @Override
+ public int getThreadPoolMaxPoolSize() {
+ return executor.getMaximumPoolSize();
+ }
+
+ @Override
+ public int getThreadPoolActivePoolSize() {
+ return executor.getActiveCount();
+ }
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java
index 241abb6f4ae..be39277fdfa 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ClosePipelineCommandHandler.java
@@ -24,6 +24,8 @@
import org.apache.hadoop.hdds.ratis.RatisHelper;
import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.statemachine
.SCMConnectionManager;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
@@ -60,9 +62,9 @@ public class ClosePipelineCommandHandler implements CommandHandler {
private final AtomicLong invocationCount = new AtomicLong(0);
private final AtomicInteger queuedCount = new AtomicInteger(0);
- private long totalTime;
private final Executor executor;
private final BiFunction newRaftClient;
+ private final MutableRate opsLatencyMs;
/**
* Constructs a closePipelineCommand handler.
@@ -80,6 +82,9 @@ public ClosePipelineCommandHandler(
Executor executor) {
this.newRaftClient = newRaftClient;
this.executor = executor;
+ MetricsRegistry registry = new MetricsRegistry(
+ ClosePipelineCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(SCMCommandProto.Type.closePipelineCommand + "Ms");
}
/**
@@ -155,7 +160,7 @@ public void handle(SCMCommand command, OzoneContainer ozoneContainer,
}
} finally {
long endTime = Time.monotonicNow();
- totalTime += endTime - startTime;
+ this.opsLatencyMs.add(endTime - startTime);
}
}, executor).whenComplete((v, e) -> queuedCount.decrementAndGet());
}
@@ -187,15 +192,12 @@ public int getInvocationCount() {
*/
@Override
public long getAverageRunTime() {
- if (invocationCount.get() > 0) {
- return totalTime / invocationCount.get();
- }
- return 0;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime;
+ return (long) this.opsLatencyMs.lastStat().total();
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java
index 9035b79c670..c3f8da74c7a 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandDispatcher.java
@@ -56,11 +56,6 @@ public final class CommandDispatcher {
private CommandDispatcher(OzoneContainer container, SCMConnectionManager
connectionManager, StateContext context,
CommandHandler... handlers) {
- Preconditions.checkNotNull(context);
- Preconditions.checkNotNull(handlers);
- Preconditions.checkArgument(handlers.length > 0);
- Preconditions.checkNotNull(container);
- Preconditions.checkNotNull(connectionManager);
this.context = context;
this.container = container;
this.connectionManager = connectionManager;
@@ -77,6 +72,7 @@ private CommandDispatcher(OzoneContainer container, SCMConnectionManager
commandHandlerMetrics = CommandHandlerMetrics.create(handlerMap);
}
+ @VisibleForTesting
public CommandHandler getCloseContainerHandler() {
return handlerMap.get(Type.closeContainerCommand);
}
@@ -201,11 +197,12 @@ public Builder setContext(StateContext stateContext) {
* @return Command Dispatcher.
*/
public CommandDispatcher build() {
- Preconditions.checkNotNull(this.connectionManager, "Missing connection" +
- " manager.");
- Preconditions.checkNotNull(this.container, "Missing container.");
- Preconditions.checkNotNull(this.context, "Missing context.");
- Preconditions.checkArgument(this.handlerList.size() > 0);
+ Preconditions.checkNotNull(this.connectionManager,
+ "Missing scm connection manager.");
+ Preconditions.checkNotNull(this.container, "Missing ozone container.");
+ Preconditions.checkNotNull(this.context, "Missing state context.");
+ Preconditions.checkArgument(this.handlerList.size() > 0,
+ "The number of command handlers must be greater than 0.");
return new CommandDispatcher(this.container, this.connectionManager,
this.context, handlerList.toArray(
new CommandHandler[handlerList.size()]));
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java
index 4a36a1987de..62fc8a919d8 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java
@@ -30,6 +30,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
import org.apache.hadoop.hdds.ratis.RatisHelper;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
import org.apache.hadoop.ozone.container.common.transport.server.XceiverServerSpi;
@@ -59,8 +61,8 @@ public class CreatePipelineCommandHandler implements CommandHandler {
private final AtomicInteger queuedCount = new AtomicInteger(0);
private final BiFunction newRaftClient;
- private long totalTime;
private final Executor executor;
+ private final MutableRate opsLatencyMs;
/**
* Constructs a createPipelineCommand handler.
@@ -75,6 +77,9 @@ public CreatePipelineCommandHandler(ConfigurationSource conf,
Executor executor) {
this.newRaftClient = newRaftClient;
this.executor = executor;
+ MetricsRegistry registry = new MetricsRegistry(
+ CreatePipelineCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(SCMCommandProto.Type.createPipelineCommand + "Ms");
}
/**
@@ -135,7 +140,7 @@ public void handle(SCMCommand command, OzoneContainer ozoneContainer,
}
} finally {
long endTime = Time.monotonicNow();
- totalTime += endTime - startTime;
+ this.opsLatencyMs.add(endTime - startTime);
}
}, executor).whenComplete((v, e) -> queuedCount.decrementAndGet());
}
@@ -167,15 +172,12 @@ public int getInvocationCount() {
*/
@Override
public long getAverageRunTime() {
- if (invocationCount.get() > 0) {
- return totalTime / invocationCount.get();
- }
- return 0;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime;
+ return (long) this.opsLatencyMs.lastStat().total();
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
index 747749066e3..6a158f51023 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
@@ -32,6 +32,8 @@
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.ozone.container.common.helpers.BlockDeletingServiceMetrics;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfoList;
@@ -91,7 +93,6 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
private final ContainerSet containerSet;
private final ConfigurationSource conf;
private int invocationCount;
- private long totalTime;
private final ThreadPoolExecutor executor;
private final LinkedBlockingQueue deleteCommandQueues;
private final Daemon handlerThread;
@@ -99,6 +100,7 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
private final BlockDeletingServiceMetrics blockDeleteMetrics;
private final long tryLockTimeoutMs;
private final Map schemaHandlers;
+ private final MutableRate opsLatencyMs;
public DeleteBlocksCommandHandler(OzoneContainer container,
ConfigurationSource conf, DatanodeConfiguration dnConf,
@@ -121,6 +123,9 @@ public DeleteBlocksCommandHandler(OzoneContainer container,
dnConf.getBlockDeleteThreads(), threadFactory);
this.deleteCommandQueues =
new LinkedBlockingQueue<>(dnConf.getBlockDeleteQueueLimit());
+ MetricsRegistry registry = new MetricsRegistry(
+ DeleteBlocksCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(SCMCommandProto.Type.deleteBlocksCommand + "Ms");
long interval = dnConf.getBlockDeleteCommandWorkerInterval().toMillis();
handlerThread = new Daemon(new DeleteCmdWorker(interval));
handlerThread.start();
@@ -168,12 +173,12 @@ public int getQueuedCount() {
@Override
public int getThreadPoolMaxPoolSize() {
- return ((ThreadPoolExecutor)executor).getMaximumPoolSize();
+ return executor.getMaximumPoolSize();
}
@Override
public int getThreadPoolActivePoolSize() {
- return ((ThreadPoolExecutor)executor).getActiveCount();
+ return executor.getActiveCount();
}
/**
@@ -354,10 +359,11 @@ private void processCmd(DeleteCmdInfo cmd) {
DeletedContainerBlocksSummary summary =
DeletedContainerBlocksSummary.getFrom(containerBlocks);
LOG.info("Summary of deleting container blocks, numOfTransactions={}, "
- + "numOfContainers={}, numOfBlocks={}",
+ + "numOfContainers={}, numOfBlocks={}, commandId={}.",
summary.getNumOfTxs(),
summary.getNumOfContainers(),
- summary.getNumOfBlocks());
+ summary.getNumOfBlocks(),
+ cmd.getCmd().getId());
if (LOG.isDebugEnabled()) {
LOG.debug("Start to delete container blocks, TXIDs={}",
summary.getTxIDSummary());
@@ -384,7 +390,14 @@ private void processCmd(DeleteCmdInfo cmd) {
LOG.debug("Sending following block deletion ACK to SCM");
for (DeleteBlockTransactionResult result : blockDeletionACK
.getResultsList()) {
- LOG.debug("{} : {}", result.getTxID(), result.getSuccess());
+ boolean success = result.getSuccess();
+ LOG.debug("TxId = {} : ContainerId = {} : {}",
+ result.getTxID(), result.getContainerID(), success);
+ if (success) {
+ blockDeleteMetrics.incrProcessedTransactionSuccessCount(1);
+ } else {
+ blockDeleteMetrics.incrProcessedTransactionFailCount(1);
+ }
}
}
}
@@ -403,7 +416,7 @@ private void processCmd(DeleteCmdInfo cmd) {
};
updateCommandStatus(cmd.getContext(), cmd.getCmd(), statusUpdater, LOG);
long endTime = Time.monotonicNow();
- totalTime += endTime - startTime;
+ this.opsLatencyMs.add(endTime - startTime);
invocationCount++;
}
}
@@ -666,15 +679,12 @@ public int getInvocationCount() {
@Override
public long getAverageRunTime() {
- if (invocationCount > 0) {
- return totalTime / invocationCount;
- }
- return 0;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime;
+ return (long) this.opsLatencyMs.lastStat().total();
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java
index ead81c32e5b..59aaacc1c80 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteContainerCommandHandler.java
@@ -22,6 +22,8 @@
import java.util.concurrent.RejectedExecutionException;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.statemachine
.SCMConnectionManager;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
@@ -36,11 +38,9 @@
import java.io.IOException;
import java.time.Clock;
import java.util.OptionalLong;
-import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
/**
* Handler to process the DeleteContainerCommand from SCM.
@@ -52,10 +52,10 @@ public class DeleteContainerCommandHandler implements CommandHandler {
private final AtomicInteger invocationCount = new AtomicInteger(0);
private final AtomicInteger timeoutCount = new AtomicInteger(0);
- private final AtomicLong totalTime = new AtomicLong(0);
- private final ExecutorService executor;
+ private final ThreadPoolExecutor executor;
private final Clock clock;
private int maxQueueSize;
+ private final MutableRate opsLatencyMs;
public DeleteContainerCommandHandler(
int threadPoolSize, Clock clock, int queueSize, String threadNamePrefix) {
@@ -70,10 +70,13 @@ public DeleteContainerCommandHandler(
}
protected DeleteContainerCommandHandler(Clock clock,
- ExecutorService executor, int queueSize) {
+ ThreadPoolExecutor executor, int queueSize) {
this.executor = executor;
this.clock = clock;
maxQueueSize = queueSize;
+ MetricsRegistry registry = new MetricsRegistry(
+ DeleteContainerCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(SCMCommandProto.Type.deleteContainerCommand + "Ms");
}
@Override
public void handle(final SCMCommand command,
@@ -125,13 +128,13 @@ private void handleInternal(SCMCommand command, StateContext context,
} catch (IOException e) {
LOG.error("Exception occurred while deleting the container.", e);
} finally {
- totalTime.getAndAdd(Time.monotonicNow() - startTime);
+ this.opsLatencyMs.add(Time.monotonicNow() - startTime);
}
}
@Override
public int getQueuedCount() {
- return ((ThreadPoolExecutor)executor).getQueue().size();
+ return executor.getQueue().size();
}
@Override
@@ -150,14 +153,22 @@ public int getTimeoutCount() {
@Override
public long getAverageRunTime() {
- final int invocations = invocationCount.get();
- return invocations == 0 ?
- 0 : totalTime.get() / invocations;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime.get();
+ return (long) this.opsLatencyMs.lastStat().total();
+ }
+
+ @Override
+ public int getThreadPoolMaxPoolSize() {
+ return executor.getMaximumPoolSize();
+ }
+
+ @Override
+ public int getThreadPoolActivePoolSize() {
+ return executor.getActiveCount();
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java
index bd7ec5710d9..77e152447b9 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/FinalizeNewLayoutVersionCommandHandler.java
@@ -20,6 +20,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.FinalizeNewLayoutVersionCommandProto;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
import org.apache.hadoop.ozone.container.common.statemachine
.SCMConnectionManager;
@@ -42,12 +44,15 @@ public class FinalizeNewLayoutVersionCommandHandler implements CommandHandler {
LoggerFactory.getLogger(FinalizeNewLayoutVersionCommandHandler.class);
private AtomicLong invocationCount = new AtomicLong(0);
- private long totalTime;
+ private final MutableRate opsLatencyMs;
/**
* Constructs a FinalizeNewLayoutVersionCommandHandler.
*/
public FinalizeNewLayoutVersionCommandHandler() {
+ MetricsRegistry registry = new MetricsRegistry(
+ FinalizeNewLayoutVersionCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(SCMCommandProto.Type.finalizeNewLayoutVersionCommand + "Ms");
}
/**
@@ -82,7 +87,7 @@ public void handle(SCMCommand command, OzoneContainer ozoneContainer,
LOG.error("Exception during finalization.", e);
} finally {
long endTime = Time.monotonicNow();
- totalTime += endTime - startTime;
+ this.opsLatencyMs.add(endTime - startTime);
}
}
@@ -113,15 +118,12 @@ public int getInvocationCount() {
*/
@Override
public long getAverageRunTime() {
- if (invocationCount.get() > 0) {
- return totalTime / invocationCount.get();
- }
- return 0;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime;
+ return (long) this.opsLatencyMs.lastStat().total();
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java
index 602687d7a00..030d169e9b8 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReconstructECContainersCommandHandler.java
@@ -36,6 +36,7 @@ public class ReconstructECContainersCommandHandler implements CommandHandler {
private final ReplicationSupervisor supervisor;
private final ECReconstructionCoordinator coordinator;
private final ConfigurationSource conf;
+ private String metricsName;
public ReconstructECContainersCommandHandler(ConfigurationSource conf,
ReplicationSupervisor supervisor,
@@ -52,8 +53,16 @@ public void handle(SCMCommand command, OzoneContainer container,
(ReconstructECContainersCommand) command;
ECReconstructionCommandInfo reconstructionCommandInfo =
new ECReconstructionCommandInfo(ecContainersCommand);
- this.supervisor.addTask(new ECReconstructionCoordinatorTask(
- coordinator, reconstructionCommandInfo));
+ ECReconstructionCoordinatorTask task = new ECReconstructionCoordinatorTask(
+ coordinator, reconstructionCommandInfo);
+ if (this.metricsName == null) {
+ this.metricsName = task.getMetricName();
+ }
+ this.supervisor.addTask(task);
+ }
+
+ public String getMetricsName() {
+ return this.metricsName;
}
@Override
@@ -63,23 +72,26 @@ public Type getCommandType() {
@Override
public int getInvocationCount() {
- return 0;
+ return this.metricsName == null ? 0 : (int) this.supervisor
+ .getReplicationRequestCount(metricsName);
}
@Override
public long getAverageRunTime() {
- return 0;
+ return this.metricsName == null ? 0 : (int) this.supervisor
+ .getReplicationRequestAvgTime(metricsName);
}
@Override
public long getTotalRunTime() {
- return 0;
+ return this.metricsName == null ? 0 : this.supervisor
+ .getReplicationRequestTotalTime(metricsName);
}
@Override
public int getQueuedCount() {
- return supervisor
- .getInFlightReplications(ECReconstructionCoordinatorTask.class);
+ return this.metricsName == null ? 0 : (int) this.supervisor
+ .getReplicationQueuedCount(metricsName);
}
public ConfigurationSource getConf() {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java
index 3c14b2fb161..1ab31ba1c41 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/RefreshVolumeUsageCommandHandler.java
@@ -18,6 +18,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
@@ -27,7 +29,6 @@
import org.slf4j.LoggerFactory;
import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
/**
* Command handler to refresh usage info of all volumes.
@@ -38,9 +39,12 @@ public class RefreshVolumeUsageCommandHandler implements CommandHandler {
LoggerFactory.getLogger(RefreshVolumeUsageCommandHandler.class);
private final AtomicInteger invocationCount = new AtomicInteger(0);
- private final AtomicLong totalTime = new AtomicLong(0);
+ private final MutableRate opsLatencyMs;
public RefreshVolumeUsageCommandHandler() {
+ MetricsRegistry registry = new MetricsRegistry(
+ RefreshVolumeUsageCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(Type.refreshVolumeUsageInfo + "Ms");
}
@Override
@@ -50,7 +54,7 @@ public void handle(SCMCommand command, OzoneContainer container,
invocationCount.incrementAndGet();
final long startTime = Time.monotonicNow();
container.getVolumeSet().refreshAllVolumeUsage();
- totalTime.getAndAdd(Time.monotonicNow() - startTime);
+ this.opsLatencyMs.add(Time.monotonicNow() - startTime);
}
@Override
@@ -66,14 +70,12 @@ public int getInvocationCount() {
@Override
public long getAverageRunTime() {
- final int invocations = invocationCount.get();
- return invocations == 0 ?
- 0 : totalTime.get() / invocations;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime.get();
+ return (long) this.opsLatencyMs.lastStat().total();
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java
index 21b26339e23..242a4eb74be 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java
@@ -43,29 +43,28 @@ public class ReplicateContainerCommandHandler implements CommandHandler {
static final Logger LOG =
LoggerFactory.getLogger(ReplicateContainerCommandHandler.class);
- private int invocationCount;
-
- private long totalTime;
-
- private ConfigurationSource conf;
-
private ReplicationSupervisor supervisor;
private ContainerReplicator downloadReplicator;
private ContainerReplicator pushReplicator;
+ private String metricsName;
+
public ReplicateContainerCommandHandler(
ConfigurationSource conf,
ReplicationSupervisor supervisor,
ContainerReplicator downloadReplicator,
ContainerReplicator pushReplicator) {
- this.conf = conf;
this.supervisor = supervisor;
this.downloadReplicator = downloadReplicator;
this.pushReplicator = pushReplicator;
}
+ public String getMetricsName() {
+ return this.metricsName;
+ }
+
@Override
public void handle(SCMCommand command, OzoneContainer container,
StateContext context, SCMConnectionManager connectionManager) {
@@ -86,12 +85,16 @@ public void handle(SCMCommand command, OzoneContainer container,
downloadReplicator : pushReplicator;
ReplicationTask task = new ReplicationTask(replicateCommand, replicator);
+ if (metricsName == null) {
+ metricsName = task.getMetricName();
+ }
supervisor.addTask(task);
}
@Override
public int getQueuedCount() {
- return supervisor.getInFlightReplications(ReplicationTask.class);
+ return this.metricsName == null ? 0 : (int) this.supervisor
+ .getReplicationQueuedCount(metricsName);
}
@Override
@@ -101,19 +104,19 @@ public SCMCommandProto.Type getCommandType() {
@Override
public int getInvocationCount() {
- return this.invocationCount;
+ return this.metricsName == null ? 0 : (int) this.supervisor
+ .getReplicationRequestCount(metricsName);
}
@Override
public long getAverageRunTime() {
- if (invocationCount > 0) {
- return totalTime / invocationCount;
- }
- return 0;
+ return this.metricsName == null ? 0 : (int) this.supervisor
+ .getReplicationRequestAvgTime(metricsName);
}
@Override
public long getTotalRunTime() {
- return totalTime;
+ return this.metricsName == null ? 0 : this.supervisor
+ .getReplicationRequestTotalTime(metricsName);
}
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java
index 6f7f4414eeb..33563624795 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java
@@ -21,8 +21,10 @@
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
-import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SetNodeOperationalStateCommandProto;
import org.apache.hadoop.hdds.utils.HddsServerUtil;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
@@ -39,7 +41,6 @@
import java.io.File;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;
@@ -54,7 +55,7 @@ public class SetNodeOperationalStateCommandHandler implements CommandHandler {
private final ConfigurationSource conf;
private final Consumer replicationSupervisor;
private final AtomicInteger invocationCount = new AtomicInteger(0);
- private final AtomicLong totalTime = new AtomicLong(0);
+ private final MutableRate opsLatencyMs;
/**
* Set Node State command handler.
@@ -65,6 +66,9 @@ public SetNodeOperationalStateCommandHandler(ConfigurationSource conf,
Consumer replicationSupervisor) {
this.conf = conf;
this.replicationSupervisor = replicationSupervisor;
+ MetricsRegistry registry = new MetricsRegistry(
+ SetNodeOperationalStateCommandHandler.class.getSimpleName());
+ this.opsLatencyMs = registry.newRate(Type.setNodeOperationalStateCommand + "Ms");
}
/**
@@ -80,9 +84,6 @@ public void handle(SCMCommand command, OzoneContainer container,
StateContext context, SCMConnectionManager connectionManager) {
long startTime = Time.monotonicNow();
invocationCount.incrementAndGet();
- StorageContainerDatanodeProtocolProtos.SetNodeOperationalStateCommandProto
- setNodeCmdProto = null;
-
if (command.getType() != Type.setNodeOperationalStateCommand) {
LOG.warn("Skipping handling command, expected command "
+ "type {} but found {}",
@@ -91,7 +92,7 @@ public void handle(SCMCommand command, OzoneContainer container,
}
SetNodeOperationalStateCommand setNodeCmd =
(SetNodeOperationalStateCommand) command;
- setNodeCmdProto = setNodeCmd.getProto();
+ SetNodeOperationalStateCommandProto setNodeCmdProto = setNodeCmd.getProto();
DatanodeDetails dni = context.getParent().getDatanodeDetails();
HddsProtos.NodeOperationalState state =
setNodeCmdProto.getNodeOperationalState();
@@ -106,7 +107,7 @@ public void handle(SCMCommand command, OzoneContainer container,
// handler interface.
}
replicationSupervisor.accept(state);
- totalTime.addAndGet(Time.monotonicNow() - startTime);
+ this.opsLatencyMs.add(Time.monotonicNow() - startTime);
}
// TODO - this duplicates code in HddsDatanodeService and InitDatanodeState
@@ -125,8 +126,7 @@ private void persistDatanodeDetails(DatanodeDetails dnDetails)
* @return Type
*/
@Override
- public StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type
- getCommandType() {
+ public Type getCommandType() {
return Type.setNodeOperationalStateCommand;
}
@@ -147,14 +147,12 @@ public int getInvocationCount() {
*/
@Override
public long getAverageRunTime() {
- final int invocations = invocationCount.get();
- return invocations == 0 ?
- 0 : totalTime.get() / invocations;
+ return (long) this.opsLatencyMs.lastStat().mean();
}
@Override
public long getTotalRunTime() {
- return totalTime.get();
+ return (long) this.opsLatencyMs.lastStat().total();
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
index b6ab4748fe3..caa6b9df121 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
@@ -495,7 +495,7 @@ public Builder setEndpointStateMachine(EndpointStateMachine rpcEndPoint) {
/**
* Sets the LayoutVersionManager.
*
- * @param versionMgr - config
+ * @param lvm config
* @return Builder
*/
public Builder setLayoutVersionManager(HDDSLayoutVersionManager lvm) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java
index 71f95cc4d32..969756b40f8 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java
@@ -244,7 +244,7 @@ public Builder setConfig(ConfigurationSource config) {
/**
* Sets the LayoutVersionManager.
*
- * @param versionMgr - config
+ * @param lvm config
* @return Builder.
*/
public Builder setLayoutVersionManager(HDDSLayoutVersionManager lvm) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
index e702b1e6e15..968c9b9a6e6 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
@@ -17,6 +17,7 @@
package org.apache.hadoop.ozone.container.common.states.endpoint;
import java.io.IOException;
+import java.net.BindException;
import java.util.concurrent.Callable;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
@@ -104,7 +105,7 @@ public EndpointStateMachine.EndPointStates call() throws Exception {
LOG.debug("Cannot execute GetVersion task as endpoint state machine " +
"is in {} state", rpcEndPoint.getState());
}
- } catch (DiskOutOfSpaceException ex) {
+ } catch (DiskOutOfSpaceException | BindException ex) {
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN);
} catch (IOException ex) {
rpcEndPoint.logIfNeeded(ex);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java
index 9c3f29d0f0c..5f1914402d0 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java
@@ -20,7 +20,6 @@
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.XceiverClientProtocolServiceGrpc;
-import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
import org.apache.ratis.grpc.util.ZeroCopyMessageMarshaller;
import org.apache.ratis.thirdparty.com.google.protobuf.MessageLite;
@@ -31,7 +30,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.InputStream;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.apache.hadoop.hdds.protocol.datanode.proto.XceiverClientProtocolServiceGrpc.getSendMethod;
@@ -45,28 +43,20 @@ public class GrpcXceiverService extends
LOG = LoggerFactory.getLogger(GrpcXceiverService.class);
private final ContainerDispatcher dispatcher;
- private final boolean zeroCopyEnabled;
private final ZeroCopyMessageMarshaller
zeroCopyMessageMarshaller = new ZeroCopyMessageMarshaller<>(
ContainerCommandRequestProto.getDefaultInstance());
- public GrpcXceiverService(ContainerDispatcher dispatcher,
- boolean zeroCopyEnabled) {
+ public GrpcXceiverService(ContainerDispatcher dispatcher) {
this.dispatcher = dispatcher;
- this.zeroCopyEnabled = zeroCopyEnabled;
}
/**
- * Bind service with zerocopy marshaller equipped for the `send` API if
- * zerocopy is enabled.
+ * Bind service with zerocopy marshaller equipped for the `send` API.
* @return service definition.
*/
public ServerServiceDefinition bindServiceWithZeroCopy() {
ServerServiceDefinition orig = super.bindService();
- if (!zeroCopyEnabled) {
- LOG.info("Zerocopy is not enabled.");
- return orig;
- }
ServerServiceDefinition.Builder builder =
ServerServiceDefinition.builder(orig.getServiceDescriptor().getName());
@@ -117,10 +107,7 @@ public void onNext(ContainerCommandRequestProto request) {
isClosed.set(true);
responseObserver.onError(e);
} finally {
- InputStream popStream = zeroCopyMessageMarshaller.popStream(request);
- if (popStream != null) {
- IOUtils.close(LOG, popStream);
- }
+ zeroCopyMessageMarshaller.release(request);
}
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java
index ad9c5c9d9ca..0d95ac25eda 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.ozone.container.common.transport.server;
import java.io.IOException;
+import java.net.BindException;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
@@ -29,7 +30,6 @@
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -66,9 +66,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_EC_GRPC_ZERO_COPY_ENABLED;
-import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_EC_GRPC_ZERO_COPY_ENABLED_DEFAULT;
-
/**
* Creates a Grpc server endpoint that acts as the communication layer for
* Ozone containers.
@@ -134,13 +131,9 @@ public XceiverServerGrpc(DatanodeDetails datanodeDetails,
eventLoopGroup = new NioEventLoopGroup(poolSize / 10, factory);
channelType = NioServerSocketChannel.class;
}
- final boolean zeroCopyEnabled = conf.getBoolean(
- OZONE_EC_GRPC_ZERO_COPY_ENABLED,
- OZONE_EC_GRPC_ZERO_COPY_ENABLED_DEFAULT);
LOG.info("GrpcServer channel type {}", channelType.getSimpleName());
- GrpcXceiverService xceiverService = new GrpcXceiverService(dispatcher,
- zeroCopyEnabled);
+ GrpcXceiverService xceiverService = new GrpcXceiverService(dispatcher);
NettyServerBuilder nettyServerBuilder = NettyServerBuilder.forPort(port)
.maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE)
.bossEventLoopGroup(eventLoopGroup)
@@ -185,7 +178,16 @@ public HddsProtos.ReplicationType getServerType() {
@Override
public void start() throws IOException {
if (!isStarted) {
- server.start();
+ try {
+ server.start();
+ } catch (IOException e) {
+ LOG.error("Error while starting the server", e);
+ if (e.getMessage().contains("Failed to bind to address")) {
+ throw new BindException(e.getMessage());
+ } else {
+ throw e;
+ }
+ }
int realPort = server.getPort();
if (port == 0) {
@@ -195,9 +197,7 @@ public void start() throws IOException {
}
//register the real port to the datanode details.
- datanodeDetails.setPort(DatanodeDetails
- .newPort(Name.STANDALONE,
- realPort));
+ datanodeDetails.setPort(DatanodeDetails.newStandalonePort(realPort));
isStarted = true;
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
index b3398de07ad..23be4138b60 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
@@ -41,8 +41,9 @@
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Consumer;
-import java.util.stream.Collectors;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
@@ -64,24 +65,24 @@
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.common.utils.BufferUtils;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
import org.apache.hadoop.ozone.container.keyvalue.impl.KeyValueStreamDataChannel;
import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController;
import org.apache.hadoop.util.Time;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import org.apache.ratis.proto.RaftProtos;
-import org.apache.ratis.proto.RaftProtos.StateMachineEntryProto;
import org.apache.ratis.proto.RaftProtos.LogEntryProto;
import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
import org.apache.ratis.proto.RaftProtos.RoleInfoProto;
+import org.apache.ratis.proto.RaftProtos.StateMachineEntryProto;
import org.apache.ratis.proto.RaftProtos.StateMachineLogEntryProto;
import org.apache.ratis.protocol.Message;
import org.apache.ratis.protocol.RaftClientRequest;
+import org.apache.ratis.protocol.RaftGroup;
import org.apache.ratis.protocol.RaftGroupId;
import org.apache.ratis.protocol.RaftGroupMemberId;
+import org.apache.ratis.protocol.RaftPeer;
import org.apache.ratis.protocol.RaftPeerId;
import org.apache.ratis.protocol.exceptions.StateMachineException;
import org.apache.ratis.server.RaftServer;
@@ -97,10 +98,10 @@
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat;
+import org.apache.ratis.util.JavaUtils;
import org.apache.ratis.util.LifeCycle;
import org.apache.ratis.util.TaskQueue;
import org.apache.ratis.util.function.CheckedSupplier;
-import org.apache.ratis.util.JavaUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -184,7 +185,6 @@ long getStartTime() {
private final SimpleStateMachineStorage storage =
new SimpleStateMachineStorage();
- private final RaftGroupId gid;
private final ContainerDispatcher dispatcher;
private final ContainerController containerController;
private final XceiverServerRatis ratisServer;
@@ -204,6 +204,7 @@ long getStartTime() {
private final boolean waitOnBothFollowers;
private final HddsDatanodeService datanodeService;
private static Semaphore semaphore = new Semaphore(1);
+ private final AtomicBoolean peersValidated;
/**
* CSM metrics.
@@ -219,7 +220,6 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI
ConfigurationSource conf,
String threadNamePrefix) {
this.datanodeService = hddsDatanodeService;
- this.gid = gid;
this.dispatcher = dispatcher;
this.containerController = containerController;
this.ratisServer = ratisServer;
@@ -233,7 +233,7 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI
// cache with FIFO eviction, and if element not found, this needs
// to be obtained from disk for slow follower
stateMachineDataCache = new ResourceCache<>(
- (index, data) -> ((ByteString)data).size(),
+ (index, data) -> data.size(),
pendingRequestsBytesLimit,
(p) -> {
if (p.wasEvicted()) {
@@ -255,6 +255,7 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI
HDDS_CONTAINER_RATIS_STATEMACHINE_MAX_PENDING_APPLY_TXNS_DEFAULT);
applyTransactionSemaphore = new Semaphore(maxPendingApplyTransactions);
stateMachineHealthy = new AtomicBoolean(true);
+ this.peersValidated = new AtomicBoolean(false);
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat(
@@ -268,6 +269,19 @@ public ContainerStateMachine(HddsDatanodeService hddsDatanodeService, RaftGroupI
}
+ private void validatePeers() throws IOException {
+ if (this.peersValidated.get()) {
+ return;
+ }
+ final RaftGroup group = ratisServer.getServerDivision(getGroupId()).getGroup();
+ final RaftPeerId selfId = ratisServer.getServer().getId();
+ if (group.getPeer(selfId) == null) {
+ throw new StorageContainerException("Current datanode " + selfId + " is not a member of " + group,
+ ContainerProtos.Result.INVALID_CONFIG);
+ }
+ peersValidated.set(true);
+ }
+
@Override
public StateMachineStorage getStateMachineStorage() {
return storage;
@@ -283,8 +297,9 @@ public void initialize(
throws IOException {
super.initialize(server, id, raftStorage);
storage.init(raftStorage);
- ratisServer.notifyGroupAdd(gid);
+ ratisServer.notifyGroupAdd(id);
+ LOG.info("{}: initialize {}", server.getId(), id);
loadSnapshot(storage.getLatestSnapshot());
}
@@ -293,7 +308,7 @@ private long loadSnapshot(SingleFileSnapshotInfo snapshot)
if (snapshot == null) {
TermIndex empty = TermIndex.valueOf(0, RaftLog.INVALID_LOG_INDEX);
LOG.info("{}: The snapshot info is null. Setting the last applied index " +
- "to:{}", gid, empty);
+ "to:{}", getGroupId(), empty);
setLastAppliedTermIndex(empty);
return empty.getIndex();
}
@@ -301,7 +316,7 @@ private long loadSnapshot(SingleFileSnapshotInfo snapshot)
final File snapshotFile = snapshot.getFile().getPath().toFile();
final TermIndex last =
SimpleStateMachineStorage.getTermIndexFromSnapshotFile(snapshotFile);
- LOG.info("{}: Setting the last applied index to {}", gid, last);
+ LOG.info("{}: Setting the last applied index to {}", getGroupId(), last);
setLastAppliedTermIndex(last);
// initialize the dispatcher with snapshot so that it build the missing
@@ -351,7 +366,7 @@ public long takeSnapshot() throws IOException {
long startTime = Time.monotonicNow();
if (!isStateMachineHealthy()) {
String msg =
- "Failed to take snapshot " + " for " + gid + " as the stateMachine"
+ "Failed to take snapshot " + " for " + getGroupId() + " as the stateMachine"
+ " is unhealthy. The last applied index is at " + ti;
StateMachineException sme = new StateMachineException(msg);
LOG.error(msg);
@@ -360,19 +375,19 @@ public long takeSnapshot() throws IOException {
if (ti != null && ti.getIndex() != RaftLog.INVALID_LOG_INDEX) {
final File snapshotFile =
storage.getSnapshotFile(ti.getTerm(), ti.getIndex());
- LOG.info("{}: Taking a snapshot at:{} file {}", gid, ti, snapshotFile);
+ LOG.info("{}: Taking a snapshot at:{} file {}", getGroupId(), ti, snapshotFile);
try (FileOutputStream fos = new FileOutputStream(snapshotFile)) {
persistContainerSet(fos);
fos.flush();
// make sure the snapshot file is synced
fos.getFD().sync();
} catch (IOException ioe) {
- LOG.error("{}: Failed to write snapshot at:{} file {}", gid, ti,
+ LOG.error("{}: Failed to write snapshot at:{} file {}", getGroupId(), ti,
snapshotFile);
throw ioe;
}
LOG.info("{}: Finished taking a snapshot at:{} file:{} took: {} ms",
- gid, ti, snapshotFile, (Time.monotonicNow() - startTime));
+ getGroupId(), ti, snapshotFile, (Time.monotonicNow() - startTime));
return ti.getIndex();
}
return -1;
@@ -386,7 +401,7 @@ public TransactionContext startTransaction(LogEntryProto entry, RaftPeerRole rol
final StateMachineLogEntryProto stateMachineLogEntry = entry.getStateMachineLogEntry();
final ContainerCommandRequestProto logProto;
try {
- logProto = getContainerCommandRequestProto(gid, stateMachineLogEntry.getLogData());
+ logProto = getContainerCommandRequestProto(getGroupId(), stateMachineLogEntry.getLogData());
} catch (InvalidProtocolBufferException e) {
trx.setException(e);
return trx;
@@ -413,7 +428,7 @@ public TransactionContext startTransaction(RaftClientRequest request)
long startTime = Time.monotonicNowNanos();
final ContainerCommandRequestProto proto =
message2ContainerCommandRequestProto(request.getMessage());
- Preconditions.checkArgument(request.getRaftGroupId().equals(gid));
+ Preconditions.checkArgument(request.getRaftGroupId().equals(getGroupId()));
final TransactionContext.Builder builder = TransactionContext.newBuilder()
.setClientRequest(request)
@@ -449,7 +464,7 @@ public TransactionContext startTransaction(RaftClientRequest request)
final WriteChunkRequestProto.Builder commitWriteChunkProto = WriteChunkRequestProto.newBuilder(write)
.clearData();
protoBuilder.setWriteChunk(commitWriteChunkProto)
- .setPipelineID(gid.getUuid().toString())
+ .setPipelineID(getGroupId().getUuid().toString())
.setTraceID(proto.getTraceID());
builder.setStateMachineData(write.getData());
@@ -491,20 +506,20 @@ private static ContainerCommandRequestProto getContainerCommandRequestProto(
private ContainerCommandRequestProto message2ContainerCommandRequestProto(
Message message) throws InvalidProtocolBufferException {
- return ContainerCommandRequestMessage.toProto(message.getContent(), gid);
+ return ContainerCommandRequestMessage.toProto(message.getContent(), getGroupId());
}
private ContainerCommandResponseProto dispatchCommand(
ContainerCommandRequestProto requestProto, DispatcherContext context) {
if (LOG.isTraceEnabled()) {
- LOG.trace("{}: dispatch {} containerID={} pipelineID={} traceID={}", gid,
+ LOG.trace("{}: dispatch {} containerID={} pipelineID={} traceID={}", getGroupId(),
requestProto.getCmdType(), requestProto.getContainerID(),
requestProto.getPipelineID(), requestProto.getTraceID());
}
ContainerCommandResponseProto response =
dispatcher.dispatch(requestProto, context);
if (LOG.isTraceEnabled()) {
- LOG.trace("{}: response {}", gid, response);
+ LOG.trace("{}: response {}", getGroupId(), response);
}
return response;
}
@@ -531,7 +546,7 @@ private CompletableFuture writeStateMachineData(
RaftServer server = ratisServer.getServer();
Preconditions.checkArgument(!write.getData().isEmpty());
try {
- if (server.getDivision(gid).getInfo().isLeader()) {
+ if (server.getDivision(getGroupId()).getInfo().isLeader()) {
stateMachineDataCache.put(entryIndex, write.getData());
}
} catch (InterruptedException ioe) {
@@ -559,7 +574,7 @@ private CompletableFuture writeStateMachineData(
return dispatchCommand(requestProto, context);
} catch (Exception e) {
LOG.error("{}: writeChunk writeStateMachineData failed: blockId" +
- "{} logIndex {} chunkName {}", gid, write.getBlockID(),
+ "{} logIndex {} chunkName {}", getGroupId(), write.getBlockID(),
entryIndex, write.getChunkData().getChunkName(), e);
metrics.incNumWriteDataFails();
// write chunks go in parallel. It's possible that one write chunk
@@ -573,7 +588,7 @@ private CompletableFuture writeStateMachineData(
writeChunkFutureMap.put(entryIndex, writeChunkFuture);
if (LOG.isDebugEnabled()) {
LOG.debug("{}: writeChunk writeStateMachineData : blockId" +
- "{} logIndex {} chunkName {}", gid, write.getBlockID(),
+ "{} logIndex {} chunkName {}", getGroupId(), write.getBlockID(),
entryIndex, write.getChunkData().getChunkName());
}
// Remove the future once it finishes execution from the
@@ -587,7 +602,7 @@ private CompletableFuture writeStateMachineData(
&& r.getResult() != ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY) {
StorageContainerException sce =
new StorageContainerException(r.getMessage(), r.getResult());
- LOG.error(gid + ": writeChunk writeStateMachineData failed: blockId" +
+ LOG.error(getGroupId() + ": writeChunk writeStateMachineData failed: blockId" +
write.getBlockID() + " logIndex " + entryIndex + " chunkName " +
write.getChunkData().getChunkName() + " Error message: " +
r.getMessage() + " Container Result: " + r.getResult());
@@ -601,7 +616,7 @@ private CompletableFuture writeStateMachineData(
metrics.incNumBytesWrittenCount(
requestProto.getWriteChunk().getChunkData().getLen());
if (LOG.isDebugEnabled()) {
- LOG.debug(gid +
+ LOG.debug(getGroupId() +
": writeChunk writeStateMachineData completed: blockId" +
write.getBlockID() + " logIndex " + entryIndex + " chunkName " +
write.getChunkData().getChunkName());
@@ -622,7 +637,7 @@ private StateMachine.DataChannel getStreamDataChannel(
DispatcherContext context) throws StorageContainerException {
if (LOG.isDebugEnabled()) {
LOG.debug("{}: getStreamDataChannel {} containerID={} pipelineID={} " +
- "traceID={}", gid, requestProto.getCmdType(),
+ "traceID={}", getGroupId(), requestProto.getCmdType(),
requestProto.getContainerID(), requestProto.getPipelineID(),
requestProto.getTraceID());
}
@@ -704,9 +719,10 @@ private ExecutorService getChunkExecutor(WriteChunkRequestProto req) {
return chunkExecutors.get(i);
}
- /*
- * writeStateMachineData calls are not synchronized with each other
- * and also with applyTransaction.
+ /**
+ * {@link #writeStateMachineData}
+ * calls are not synchronized with each other
+ * and also with {@code applyTransaction(TransactionContext)}.
*/
@Override
public CompletableFuture write(LogEntryProto entry, TransactionContext trx) {
@@ -780,7 +796,7 @@ private ByteString readStateMachineData(
new StorageContainerException(response.getMessage(),
response.getResult());
LOG.error("gid {} : ReadStateMachine failed. cmd {} logIndex {} msg : "
- + "{} Container Result: {}", gid, response.getCmdType(), index,
+ + "{} Container Result: {}", getGroupId(), response.getCmdType(), index,
response.getMessage(), response.getResult());
stateMachineHealthy.set(false);
throw sce;
@@ -816,15 +832,13 @@ private ByteString readStateMachineData(
*/
@Override
public CompletableFuture flush(long index) {
- List> futureList =
- writeChunkFutureMap.entrySet().stream().filter(x -> x.getKey() <= index)
- .map(Map.Entry::getValue).collect(Collectors.toList());
return CompletableFuture.allOf(
- futureList.toArray(new CompletableFuture[futureList.size()]));
+ writeChunkFutureMap.entrySet().stream().filter(x -> x.getKey() <= index)
+ .map(Map.Entry::getValue).toArray(CompletableFuture[]::new));
}
/**
- * This method is used by the Leader to read state machine date for sending appendEntries to followers.
+ * This method is used by the Leader to read state machine data for sending appendEntries to followers.
* It will first get the data from {@link #stateMachineDataCache}.
* If the data is not in the cache, it will read from the file by dispatching a command
*
@@ -857,7 +871,7 @@ public CompletableFuture read(LogEntryProto entry, TransactionContex
.map(TransactionContext::getStateMachineContext)
.orElse(null);
final ContainerCommandRequestProto requestProto = context != null ? context.getLogProto()
- : getContainerCommandRequestProto(gid, entry.getStateMachineLogEntry().getLogData());
+ : getContainerCommandRequestProto(getGroupId(), entry.getStateMachineLogEntry().getLogData());
if (requestProto.getCmdType() != Type.WriteChunk) {
throw new IllegalStateException("Cmd type:" + requestProto.getCmdType()
@@ -875,7 +889,7 @@ public CompletableFuture read(LogEntryProto entry, TransactionContex
return future;
} catch (Exception e) {
metrics.incNumReadStateMachineFails();
- LOG.error("{} unable to read stateMachineData:", gid, e);
+ LOG.error("{} unable to read stateMachineData:", getGroupId(), e);
return completeExceptionally(e);
}
}
@@ -921,7 +935,7 @@ public void notifyServerShutdown(RaftProtos.RoleInfoProto roleInfo, boolean allS
// from `HddsDatanodeService.stop()`, otherwise, it indicates this `close` originates from ratis.
if (allServer) {
if (datanodeService != null && !datanodeService.isStopped()) {
- LOG.info("{} is closed by ratis", gid);
+ LOG.info("{} is closed by ratis", getGroupId());
if (semaphore.tryAcquire()) {
// run with a different thread, so this raft group can be closed
Runnable runnable = () -> {
@@ -953,7 +967,7 @@ public void notifyServerShutdown(RaftProtos.RoleInfoProto roleInfo, boolean allS
CompletableFuture.runAsync(runnable);
}
} else {
- LOG.info("{} is closed by HddsDatanodeService", gid);
+ LOG.info("{} is closed by HddsDatanodeService", getGroupId());
}
}
}
@@ -965,6 +979,11 @@ private CompletableFuture applyTransaction(
final CheckedSupplier task
= () -> {
try {
+ try {
+ this.validatePeers();
+ } catch (StorageContainerException e) {
+ return ContainerUtils.logAndReturnError(LOG, e, request);
+ }
long timeNow = Time.monotonicNowNanos();
long queueingDelay = timeNow - context.getStartTime();
metrics.recordQueueingDelay(request.getCmdType(), queueingDelay);
@@ -984,14 +1003,17 @@ private CompletableFuture applyTransaction(
private void removeStateMachineDataIfNeeded(long index) {
if (waitOnBothFollowers) {
try {
- RaftServer.Division division = ratisServer.getServer().getDivision(gid);
+ RaftServer.Division division = ratisServer.getServer().getDivision(getGroupId());
if (division.getInfo().isLeader()) {
- long minIndex = Arrays.stream(division.getInfo()
- .getFollowerNextIndices()).min().getAsLong();
- LOG.debug("Removing data corresponding to log index {} min index {} "
- + "from cache", index, minIndex);
- removeCacheDataUpTo(Math.min(minIndex, index));
+ Arrays.stream(division.getInfo()
+ .getFollowerNextIndices()).min().ifPresent(minIndex -> {
+ removeCacheDataUpTo(Math.min(minIndex, index));
+ LOG.debug("Removing data corresponding to log index {} min index {} "
+ + "from cache", index, minIndex);
+ });
}
+ } catch (RuntimeException e) {
+ throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -1042,7 +1064,7 @@ public CompletableFuture applyTransaction(TransactionContext trx) {
CompletableFuture applyTransactionFuture =
new CompletableFuture<>();
final Consumer exceptionHandler = e -> {
- LOG.error(gid + ": failed to applyTransaction at logIndex " + index
+ LOG.error(getGroupId() + ": failed to applyTransaction at logIndex " + index
+ " for " + requestProto.getCmdType(), e);
stateMachineHealthy.compareAndSet(true, false);
metrics.incNumApplyTransactionsFails();
@@ -1070,7 +1092,7 @@ public CompletableFuture applyTransaction(TransactionContext trx) {
new StorageContainerException(r.getMessage(), r.getResult());
LOG.error(
"gid {} : ApplyTransaction failed. cmd {} logIndex {} msg : "
- + "{} Container Result: {}", gid, r.getCmdType(), index,
+ + "{} Container Result: {}", getGroupId(), r.getCmdType(), index,
r.getMessage(), r.getResult());
metrics.incNumApplyTransactionsFails();
// Since the applyTransaction now is completed exceptionally,
@@ -1079,12 +1101,12 @@ public CompletableFuture applyTransaction(TransactionContext trx) {
// shutdown.
applyTransactionFuture.completeExceptionally(sce);
stateMachineHealthy.compareAndSet(true, false);
- ratisServer.handleApplyTransactionFailure(gid, trx.getServerRole());
+ ratisServer.handleApplyTransactionFailure(getGroupId(), trx.getServerRole());
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(
"gid {} : ApplyTransaction completed. cmd {} logIndex {} msg : "
- + "{} Container Result: {}", gid, r.getCmdType(), index,
+ + "{} Container Result: {}", getGroupId(), r.getCmdType(), index,
r.getMessage(), r.getResult());
}
if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile) {
@@ -1161,26 +1183,26 @@ public void evictStateMachineCache() {
}
@Override
- public void notifyFollowerSlowness(RoleInfoProto roleInfoProto) {
- ratisServer.handleNodeSlowness(gid, roleInfoProto);
+ public void notifyFollowerSlowness(RoleInfoProto roleInfoProto, RaftPeer follower) {
+ ratisServer.handleFollowerSlowness(getGroupId(), roleInfoProto, follower);
}
@Override
public void notifyExtendedNoLeader(RoleInfoProto roleInfoProto) {
- ratisServer.handleNoLeader(gid, roleInfoProto);
+ ratisServer.handleNoLeader(getGroupId(), roleInfoProto);
}
@Override
public void notifyLogFailed(Throwable t, LogEntryProto failedEntry) {
- LOG.error("{}: {} {}", gid, TermIndex.valueOf(failedEntry),
+ LOG.error("{}: {} {}", getGroupId(), TermIndex.valueOf(failedEntry),
toStateMachineLogEntryString(failedEntry.getStateMachineLogEntry()), t);
- ratisServer.handleNodeLogFailure(gid, t);
+ ratisServer.handleNodeLogFailure(getGroupId(), t);
}
@Override
public CompletableFuture notifyInstallSnapshotFromLeader(
RoleInfoProto roleInfoProto, TermIndex firstTermIndexInLog) {
- ratisServer.handleInstallSnapshotFromLeader(gid, roleInfoProto,
+ ratisServer.handleInstallSnapshotFromLeader(getGroupId(), roleInfoProto,
firstTermIndexInLog);
final CompletableFuture future = new CompletableFuture<>();
future.complete(firstTermIndexInLog);
@@ -1189,7 +1211,7 @@ public CompletableFuture notifyInstallSnapshotFromLeader(
@Override
public void notifyGroupRemove() {
- ratisServer.notifyGroupRemove(gid);
+ ratisServer.notifyGroupRemove(getGroupId());
// Make best effort to quasi-close all the containers on group removal.
// Containers already in terminal state like CLOSED or UNHEALTHY will not
// be affected.
@@ -1197,7 +1219,7 @@ public void notifyGroupRemove() {
try {
containerController.markContainerForClose(cid);
containerController.quasiCloseContainer(cid,
- "Ratis group removed");
+ "Ratis group removed. Group id: " + getGroupId());
} catch (IOException e) {
LOG.debug("Failed to quasi-close container {}", cid);
}
@@ -1219,7 +1241,7 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId,
@Override
public String toStateMachineLogEntryString(StateMachineLogEntryProto proto) {
- return smProtoToString(gid, containerController, proto);
+ return smProtoToString(getGroupId(), containerController, proto);
}
public static String smProtoToString(RaftGroupId gid,
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
index 7899cdcc0e6..a4c14343985 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
@@ -104,6 +104,7 @@
import org.apache.ratis.server.RaftServerRpc;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.ratis.server.storage.RaftStorage;
+import org.apache.ratis.util.Preconditions;
import org.apache.ratis.util.SizeInBytes;
import org.apache.ratis.util.TimeDuration;
import org.apache.ratis.util.TraditionalBinaryPrefix;
@@ -161,19 +162,18 @@ private static long nextCallId() {
private int clientPort;
private int dataStreamPort;
private final RaftServer server;
+ private final String name;
private final List chunkExecutors;
private final ContainerDispatcher dispatcher;
private final ContainerController containerController;
private final ClientId clientId = ClientId.randomId();
private final StateContext context;
- private final long nodeFailureTimeoutMs;
private boolean isStarted = false;
private final DatanodeDetails datanodeDetails;
private final ConfigurationSource conf;
// TODO: Remove the gids set when Ratis supports an api to query active
// pipelines
private final ConcurrentMap activePipelines = new ConcurrentHashMap<>();
- private final RaftPeerId raftPeerId;
// Timeout used while calling submitRequest directly.
private final long requestTimeout;
private final boolean shouldDeleteRatisLogDirectory;
@@ -197,14 +197,14 @@ private XceiverServerRatis(HddsDatanodeService hddsDatanodeService, DatanodeDeta
this.context = context;
this.dispatcher = dispatcher;
this.containerController = containerController;
- this.raftPeerId = RatisHelper.toRaftPeerId(dd);
String threadNamePrefix = datanodeDetails.threadNamePrefix();
chunkExecutors = createChunkExecutors(conf, threadNamePrefix);
- nodeFailureTimeoutMs = ratisServerConfig.getFollowerSlownessTimeout();
shouldDeleteRatisLogDirectory =
ratisServerConfig.shouldDeleteRatisLogDirectory();
RaftProperties serverProperties = newRaftProperties();
+ final RaftPeerId raftPeerId = RatisHelper.toRaftPeerId(dd);
+ this.name = getClass().getSimpleName() + "(" + raftPeerId + ")";
this.server =
RaftServer.newBuilder().setServerId(raftPeerId)
.setProperties(serverProperties)
@@ -474,7 +474,7 @@ private void setStateMachineDataConfigurations(RaftProperties properties) {
// NOTE : the default value for the retry count in ratis is -1,
// which means retry indefinitely.
- int syncTimeoutRetryDefault = (int) nodeFailureTimeoutMs /
+ final int syncTimeoutRetryDefault = (int) ratisServerConfig.getFollowerSlownessTimeout() /
dataSyncTimeout.toIntExact(TimeUnit.MILLISECONDS);
int numSyncRetries = conf.getInt(
OzoneConfigKeys.HDDS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES,
@@ -558,7 +558,7 @@ private static Parameters createTlsParameters(SecurityConfig conf,
@Override
public void start() throws IOException {
if (!isStarted) {
- LOG.info("Starting {} {}", getClass().getSimpleName(), server.getId());
+ LOG.info("Starting {}", name);
for (ThreadPoolExecutor executor : chunkExecutors) {
executor.prestartAllCoreThreads();
}
@@ -581,11 +581,11 @@ public void start() throws IOException {
}
}
- private int getRealPort(InetSocketAddress address, Port.Name name) {
+ private int getRealPort(InetSocketAddress address, Port.Name portName) {
int realPort = address.getPort();
- datanodeDetails.setPort(DatanodeDetails.newPort(name, realPort));
- LOG.info("{} {} is started using port {} for {}",
- getClass().getSimpleName(), server.getId(), realPort, name);
+ final Port port = DatanodeDetails.newPort(portName, realPort);
+ datanodeDetails.setPort(port);
+ LOG.info("{} is started using port {}", name, port);
return realPort;
}
@@ -593,7 +593,7 @@ private int getRealPort(InetSocketAddress address, Port.Name name) {
public void stop() {
if (isStarted) {
try {
- LOG.info("Stopping {} {}", getClass().getSimpleName(), server.getId());
+ LOG.info("Closing {}", name);
// shutdown server before the executors as while shutting down,
// some of the tasks would be executed using the executors.
server.close();
@@ -602,7 +602,7 @@ public void stop() {
}
isStarted = false;
} catch (IOException e) {
- LOG.error("XceiverServerRatis Could not be stopped gracefully.", e);
+ LOG.error("Failed to close {}.", name, e);
}
}
}
@@ -706,45 +706,40 @@ private GroupInfoRequest createGroupInfoRequest(
nextCallId());
}
- private void handlePipelineFailure(RaftGroupId groupId,
- RoleInfoProto roleInfoProto) {
- String msg;
- UUID datanode = RatisHelper.toDatanodeId(roleInfoProto.getSelf());
- RaftPeerId id = RaftPeerId.valueOf(roleInfoProto.getSelf().getId());
+ private void handlePipelineFailure(RaftGroupId groupId, RoleInfoProto roleInfoProto, String reason) {
+ final RaftPeerId raftPeerId = RaftPeerId.valueOf(roleInfoProto.getSelf().getId());
+ Preconditions.assertEquals(getServer().getId(), raftPeerId, "raftPeerId");
+ final StringBuilder b = new StringBuilder()
+ .append(name).append(" with datanodeId ").append(RatisHelper.toDatanodeId(raftPeerId))
+ .append("handlePipelineFailure ").append(" for ").append(reason)
+ .append(": ").append(roleInfoProto.getRole())
+ .append(" elapsed time=").append(roleInfoProto.getRoleElapsedTimeMs()).append("ms");
+
switch (roleInfoProto.getRole()) {
case CANDIDATE:
- msg = datanode + " is in candidate state for " +
- roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() + "ms";
+ final long lastLeaderElapsedTime = roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs();
+ b.append(", lastLeaderElapsedTime=").append(lastLeaderElapsedTime).append("ms");
break;
case FOLLOWER:
- msg = datanode + " closes pipeline when installSnapshot from leader " +
- "because leader snapshot doesn't contain any data to replay, " +
- "all the log entries prior to the snapshot might have been purged." +
- "So follower should not try to install snapshot from leader but" +
- "can close the pipeline here. It's in follower state for " +
- roleInfoProto.getRoleElapsedTimeMs() + "ms";
+ b.append(", outstandingOp=").append(roleInfoProto.getFollowerInfo().getOutstandingOp());
break;
case LEADER:
- StringBuilder sb = new StringBuilder();
- sb.append(datanode).append(" has not seen follower/s");
- for (RaftProtos.ServerRpcProto follower : roleInfoProto.getLeaderInfo()
- .getFollowerInfoList()) {
- if (follower.getLastRpcElapsedTimeMs() > nodeFailureTimeoutMs) {
- sb.append(" ").append(RatisHelper.toDatanodeId(follower.getId()))
- .append(" for ").append(follower.getLastRpcElapsedTimeMs())
- .append("ms");
- }
+ final long followerSlownessTimeoutMs = ratisServerConfig.getFollowerSlownessTimeout();
+ for (RaftProtos.ServerRpcProto follower : roleInfoProto.getLeaderInfo().getFollowerInfoList()) {
+ final long lastRpcElapsedTimeMs = follower.getLastRpcElapsedTimeMs();
+ final boolean slow = lastRpcElapsedTimeMs > followerSlownessTimeoutMs;
+ final RaftPeerId followerId = RaftPeerId.valueOf(follower.getId().getId());
+ b.append("\n Follower ").append(followerId)
+ .append(" with datanodeId ").append(RatisHelper.toDatanodeId(followerId))
+ .append(" is ").append(slow ? "slow" : " responding")
+ .append(" with lastRpcElapsedTime=").append(lastRpcElapsedTimeMs).append("ms");
}
- msg = sb.toString();
break;
default:
- LOG.error("unknown state: {}", roleInfoProto.getRole());
- throw new IllegalStateException("node" + id + " is in illegal role "
- + roleInfoProto.getRole());
+ throw new IllegalStateException("Unexpected role " + roleInfoProto.getRole());
}
- triggerPipelineClose(groupId, msg,
- ClosePipelineInfo.Reason.PIPELINE_FAILED);
+ triggerPipelineClose(groupId, b.toString(), ClosePipelineInfo.Reason.PIPELINE_FAILED);
}
private void triggerPipelineClose(RaftGroupId groupId, String detail,
@@ -869,12 +864,12 @@ public void removeGroup(HddsProtos.PipelineID pipelineId)
processReply(reply);
}
- void handleNodeSlowness(RaftGroupId groupId, RoleInfoProto roleInfoProto) {
- handlePipelineFailure(groupId, roleInfoProto);
+ void handleFollowerSlowness(RaftGroupId groupId, RoleInfoProto roleInfoProto, RaftPeer follower) {
+ handlePipelineFailure(groupId, roleInfoProto, "slow follower " + follower.getId());
}
void handleNoLeader(RaftGroupId groupId, RoleInfoProto roleInfoProto) {
- handlePipelineFailure(groupId, roleInfoProto);
+ handlePipelineFailure(groupId, roleInfoProto, "no leader");
}
void handleApplyTransactionFailure(RaftGroupId groupId,
@@ -901,10 +896,9 @@ void handleApplyTransactionFailure(RaftGroupId groupId,
void handleInstallSnapshotFromLeader(RaftGroupId groupId,
RoleInfoProto roleInfoProto,
TermIndex firstTermIndexInLog) {
- LOG.warn("Install snapshot notification received from Leader with " +
- "termIndex: {}, terminating pipeline: {}",
+ LOG.warn("handleInstallSnapshotFromLeader for firstTermIndexInLog={}, terminating pipeline: {}",
firstTermIndexInLog, groupId);
- handlePipelineFailure(groupId, roleInfoProto);
+ handlePipelineFailure(groupId, roleInfoProto, "install snapshot notification");
}
/**
@@ -950,7 +944,7 @@ void handleLeaderChangedNotification(RaftGroupMemberId groupMemberId,
LOG.info("Leader change notification received for group: {} with new " +
"leaderId: {}", groupMemberId.getGroupId(), raftPeerId1);
// Save the reported leader to be sent with the report to SCM
- boolean leaderForGroup = this.raftPeerId.equals(raftPeerId1);
+ final boolean leaderForGroup = server.getId().equals(raftPeerId1);
activePipelines.compute(groupMemberId.getGroupId(),
(key, value) -> value == null ? new ActivePipelineContext(leaderForGroup, false) :
new ActivePipelineContext(leaderForGroup, value.isPendingClose()));
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
index b22b9148bb1..5fced0e39b3 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
@@ -29,6 +29,7 @@
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.annotation.InterfaceStability;
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature;
import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult;
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
@@ -36,6 +37,7 @@
import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil;
import org.apache.hadoop.ozone.container.common.utils.RawDB;
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
+import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController;
import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures;
import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures.SchemaV3;
import org.apache.hadoop.util.Time;
@@ -44,6 +46,7 @@
import jakarta.annotation.Nullable;
+import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_DATANODE_IO_METRICS_PERCENTILES_INTERVALS_SECONDS_KEY;
import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_DB_NAME;
import static org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil.initPerDiskDBStore;
@@ -80,6 +83,8 @@ public class HddsVolume extends StorageVolume {
private final VolumeIOStats volumeIOStats;
private final VolumeInfoMetrics volumeInfoMetrics;
+ private ContainerController controller;
+
private final AtomicLong committedBytes = new AtomicLong(); // till Open containers become full
// Mentions the type of volume
@@ -119,8 +124,10 @@ private HddsVolume(Builder b) throws IOException {
if (!b.getFailedVolume() && getVolumeInfo().isPresent()) {
this.setState(VolumeState.NOT_INITIALIZED);
+ ConfigurationSource conf = getConf();
+ int[] intervals = conf.getInts(OZONE_DATANODE_IO_METRICS_PERCENTILES_INTERVALS_SECONDS_KEY);
this.volumeIOStats = new VolumeIOStats(b.getVolumeRootStr(),
- this.getStorageDir().toString());
+ this.getStorageDir().toString(), intervals);
this.volumeInfoMetrics =
new VolumeInfoMetrics(b.getVolumeRootStr(), this);
@@ -199,7 +206,7 @@ public void shutdown() {
/**
* Delete all files under
- * /hdds//tmp/deleted-containers.
+ * volume/hdds/cluster-id/tmp/deleted-containers.
* This is the directory where containers are moved when they are deleted
* from the system, but before being removed from the filesystem. This
* makes the deletion atomic.
@@ -382,6 +389,17 @@ public void loadDbStore(boolean readOnly) throws IOException {
getStorageID());
}
+ public void setController(ContainerController controller) {
+ this.controller = controller;
+ }
+
+ public long getContainers() {
+ if (controller != null) {
+ return controller.getContainerCount(this);
+ }
+ return 0;
+ }
+
/**
* Pick a DbVolume for HddsVolume and init db instance.
* Use the HddsVolume directly if no DbVolume found.
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
index e195b127d49..9afea8e6b0c 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
@@ -44,6 +44,7 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import org.apache.ratis.util.function.CheckedRunnable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -84,7 +85,7 @@ public class MutableVolumeSet implements VolumeSet {
private String clusterID;
private final StorageVolumeChecker volumeChecker;
- private Runnable failedVolumeListener;
+ private CheckedRunnable failedVolumeListener;
private StateContext context;
private final StorageVolumeFactory volumeFactory;
private final StorageVolume.VolumeType volumeType;
@@ -132,7 +133,7 @@ public MutableVolumeSet(String dnUuid, String clusterID,
initializeVolumeSet();
}
- public void setFailedVolumeListener(Runnable runnable) {
+ public void setFailedVolumeListener(CheckedRunnable runnable) {
failedVolumeListener = runnable;
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeIOStats.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeIOStats.java
index e22addd354f..2ce19c3bf19 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeIOStats.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeIOStats.java
@@ -21,7 +21,10 @@
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
+import org.apache.hadoop.metrics2.lib.MutableRate;
/**
* This class is used to track Volume IO stats for each HDDS Volume.
@@ -29,12 +32,23 @@
public class VolumeIOStats {
private String metricsSourceName = VolumeIOStats.class.getSimpleName();
private String storageDirectory;
- private @Metric MutableCounterLong readBytes;
- private @Metric MutableCounterLong readOpCount;
- private @Metric MutableCounterLong writeBytes;
- private @Metric MutableCounterLong writeOpCount;
- private @Metric MutableCounterLong readTime;
- private @Metric MutableCounterLong writeTime;
+ private final MetricsRegistry registry = new MetricsRegistry("VolumeIOStats");
+ @Metric
+ private MutableCounterLong readBytes;
+ @Metric
+ private MutableCounterLong readOpCount;
+ @Metric
+ private MutableCounterLong writeBytes;
+ @Metric
+ private MutableCounterLong writeOpCount;
+ @Metric
+ private MutableRate readTime;
+ @Metric
+ private MutableQuantiles[] readLatencyQuantiles;
+ @Metric
+ private MutableRate writeTime;
+ @Metric
+ private MutableQuantiles[] writeLatencyQuantiles;
@Deprecated
public VolumeIOStats() {
@@ -44,9 +58,24 @@ public VolumeIOStats() {
/**
* @param identifier Typically, path to volume root. e.g. /data/hdds
*/
- public VolumeIOStats(String identifier, String storageDirectory) {
+ public VolumeIOStats(String identifier, String storageDirectory, int[] intervals) {
this.metricsSourceName += '-' + identifier;
this.storageDirectory = storageDirectory;
+
+ // Try initializing `readLatencyQuantiles` and `writeLatencyQuantiles`
+ if (intervals != null && intervals.length > 0) {
+ final int length = intervals.length;
+ readLatencyQuantiles = new MutableQuantiles[intervals.length];
+ writeLatencyQuantiles = new MutableQuantiles[intervals.length];
+ for (int i = 0; i < length; i++) {
+ readLatencyQuantiles[i] = registry.newQuantiles(
+ "readLatency" + intervals[i] + "s",
+ "Read Data File Io Latency in ms", "ops", "latency", intervals[i]);
+ writeLatencyQuantiles[i] = registry.newQuantiles(
+ "writeLatency" + intervals[i] + "s",
+ "Write Data File Io Latency in ms", "ops", "latency", intervals[i]);
+ }
+ }
init();
}
@@ -99,7 +128,10 @@ public void incWriteOpCount() {
* @param time
*/
public void incReadTime(long time) {
- readTime.incr(time);
+ readTime.add(time);
+ for (MutableQuantiles q : readLatencyQuantiles) {
+ q.add(time);
+ }
}
/**
@@ -107,7 +139,10 @@ public void incReadTime(long time) {
* @param time
*/
public void incWriteTime(long time) {
- writeTime.incr(time);
+ writeTime.add(time);
+ for (MutableQuantiles q : writeLatencyQuantiles) {
+ q.add(time);
+ }
}
/**
@@ -147,7 +182,7 @@ public long getWriteOpCount() {
* @return long
*/
public long getReadTime() {
- return readTime.value();
+ return (long) readTime.lastStat().total();
}
/**
@@ -155,7 +190,7 @@ public long getReadTime() {
* @return long
*/
public long getWriteTime() {
- return writeTime.value();
+ return (long) writeTime.lastStat().total();
}
@Metric
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java
index af890269255..3d1be9791ec 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java
@@ -46,16 +46,18 @@
* - fsCapacity: reported total capacity from local fs.
* - minVolumeFreeSpace (mvfs) : determines the free space for closing
containers.This is like adding a few reserved bytes to reserved space.
- Dn's will send close container action to SCM at this limit & it is
+ Dn's will send close container action to SCM at this limit, and it is
configurable.
*
- *
+ *
+ * {@code
* |----used----| (avail) |++mvfs++|++++reserved+++++++|
* |<- capacity ->|
* | fsAvail |-------other-----------|
* |<- fsCapacity ->|
- *
+ * }
+ *
* What we could directly get from local fs:
* fsCapacity, fsAvail, (fsUsed = fsCapacity - fsAvail)
* We could get from config:
@@ -78,11 +80,13 @@
* then we should use DedicatedDiskSpaceUsage for
* `hdds.datanode.du.factory.classname`,
* Then it is much simpler, since we don't care about other usage:
- *
+ * {@code
* |----used----| (avail)/fsAvail |
* |<- capacity/fsCapacity ->|
+ * }
*
* We have avail == fsAvail.
+ *
*/
public final class VolumeInfo {
@@ -153,11 +157,14 @@ public long getCapacity() {
}
/**
+ *
+ * {@code
* Calculate available space use method A.
* |----used----| (avail) |++++++++reserved++++++++|
* |<- capacity ->|
- *
* A) avail = capacity - used
+ * }
+ *
*/
public long getAvailable() {
return usage.getAvailable();
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java
index 68140600db9..cd31b8063d3 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java
@@ -37,6 +37,7 @@ public class VolumeInfoMetrics {
private final HddsVolume volume;
@Metric("Returns the RocksDB compact times of the Volume")
private MutableRate dbCompactLatency;
+ private long containers;
/**
* @param identifier Typically, path to volume root. E.g. /data/hdds
@@ -153,4 +154,11 @@ public void dbCompactTimesNanoSecondsIncr(long time) {
dbCompactLatency.add(time);
}
+ /**
+ * Return the Container Count of the Volume.
+ */
+ @Metric("Returns the Container Count of the Volume")
+ public long getContainers() {
+ return volume.getContainers();
+ }
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java
index 7e138b05716..34ba66c91bb 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.ozone.container.common.volume;
import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.hdds.conf.ConfigurationException;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.StorageSize;
import org.apache.hadoop.hdds.conf.StorageUnit;
@@ -77,11 +78,15 @@ public long getUsedSpace() {
}
/**
+ *
+ * {@code
* Calculate available space use method B.
* |----used----| (avail) |++++++++reserved++++++++|
* | fsAvail |-------other-------|
- * ->|~~~~|<-
+ * ->|~~~~|<-
* remainingReserved
+ * }
+ *
* B) avail = fsAvail - Max(reserved - other, 0);
*/
public SpaceUsageSource getCurrentUsage() {
@@ -216,9 +221,8 @@ private static long getReserved(ConfigurationSource conf, String rootDir,
for (String reserve : reserveList) {
String[] words = reserve.split(":");
if (words.length < 2) {
- LOG.error("Reserved space should be configured in a pair, but current value is {}",
- reserve);
- continue;
+ throw new ConfigurationException("hdds.datanode.dir.du.reserved - " +
+ "Reserved space should be configured in a pair, but current value is " + reserve);
}
try {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java
index 9dedd65565f..95b7d06167f 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECContainerOperationClient.java
@@ -18,26 +18,24 @@
package org.apache.hadoop.ozone.container.ec.reconstruction;
import com.google.common.collect.ImmutableList;
+import jakarta.annotation.Nonnull;
import org.apache.commons.collections.map.SingletonMap;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State;
import org.apache.hadoop.hdds.scm.XceiverClientManager;
import org.apache.hadoop.hdds.scm.XceiverClientSpi;
import org.apache.hadoop.hdds.scm.client.ClientTrustManager;
-import org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls;
import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
-import org.apache.hadoop.hdds.utils.HAUtils;
import org.apache.hadoop.ozone.OzoneSecurityUtil;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
-import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State;
-import jakarta.annotation.Nonnull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -46,7 +44,6 @@
import java.util.List;
import java.util.Objects;
import java.util.Set;
-import java.util.stream.Collectors;
/**
* This class wraps necessary container-level rpc calls
@@ -69,21 +66,17 @@ public ECContainerOperationClient(ConfigurationSource conf,
}
@Nonnull
- private static XceiverClientManager createClientManager(
- ConfigurationSource conf, CertificateClient certificateClient)
+ private static XceiverClientManager createClientManager(ConfigurationSource conf, CertificateClient certificateClient)
throws IOException {
ClientTrustManager trustManager = null;
if (OzoneSecurityUtil.isSecurityEnabled(conf)) {
- CACertificateProvider localCaCerts =
- () -> HAUtils.buildCAX509List(certificateClient, conf);
- CACertificateProvider remoteCacerts =
- () -> HAUtils.buildCAX509List(null, conf);
- trustManager = new ClientTrustManager(remoteCacerts, localCaCerts);
+ trustManager = certificateClient.createClientTrustManager();
}
- return new XceiverClientManager(conf,
- new XceiverClientManager.XceiverClientManagerConfigBuilder()
- .setMaxCacheSize(256).setStaleThresholdMs(10 * 1000).build(),
- trustManager);
+ XceiverClientManager.ScmClientConfig scmClientConfig = new XceiverClientManager.XceiverClientManagerConfigBuilder()
+ .setMaxCacheSize(256)
+ .setStaleThresholdMs(10 * 1000)
+ .build();
+ return new XceiverClientManager(conf, scmClientConfig, trustManager);
}
public BlockData[] listBlock(long containerId, DatanodeDetails dn,
@@ -99,14 +92,11 @@ public BlockData[] listBlock(long containerId, DatanodeDetails dn,
try {
return BlockData.getFromProtoBuf(i);
} catch (IOException e) {
- LOG.debug("Failed while converting to protobuf BlockData. Returning"
- + " null for listBlock from DN: " + dn,
- e);
+ LOG.debug("Failed while converting to protobuf BlockData. Returning null for listBlock from DN: {}", dn, e);
// TODO: revisit here.
return null;
}
- }).collect(Collectors.toList())
- .toArray(new BlockData[blockDataList.size()]);
+ }).toArray(BlockData[]::new);
} finally {
this.xceiverClientManager.releaseClient(xceiverClient, false);
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java
index 7e64766b41c..f1e1d0d900b 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java
@@ -42,7 +42,6 @@
import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.io.ByteBufferPool;
import org.apache.hadoop.io.ElasticByteBufferPool;
-import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.client.io.BlockInputStreamFactory;
import org.apache.hadoop.ozone.client.io.BlockInputStreamFactoryImpl;
import org.apache.hadoop.ozone.client.io.ECBlockInputStreamProxy;
@@ -371,7 +370,7 @@ private void logBlockGroupDetails(BlockLocationInfo blockLocationInfo,
.append(" block length: ")
.append(data.getSize())
.append(" block group length: ")
- .append(getBlockDataLength(data))
+ .append(data.getBlockGroupLength())
.append(" chunk list: \n");
int cnt = 0;
for (ContainerProtos.ChunkInfo chunkInfo : data.getChunks()) {
@@ -573,7 +572,7 @@ private long calcEffectiveBlockGroupLen(BlockData[] blockGroup,
continue;
}
- long putBlockLen = getBlockDataLength(blockGroup[i]);
+ long putBlockLen = blockGroup[i].getBlockGroupLength();
// Use safe length is the minimum of the lengths recorded across the
// stripe
blockGroupLen = Math.min(putBlockLen, blockGroupLen);
@@ -581,16 +580,6 @@ private long calcEffectiveBlockGroupLen(BlockData[] blockGroup,
return blockGroupLen == Long.MAX_VALUE ? 0 : blockGroupLen;
}
- private long getBlockDataLength(BlockData blockData) {
- String lenStr = blockData.getMetadata()
- .get(OzoneConsts.BLOCK_GROUP_LEN_KEY_IN_PUT_BLOCK);
- // If we don't have the length, then it indicates a problem with the stripe.
- // All replica should carry the length, so if it is not there, we return 0,
- // which will cause us to set the length of the block to zero and not
- // attempt to reconstruct it.
- return (lenStr == null) ? 0 : Long.parseLong(lenStr);
- }
-
public ECReconstructionMetrics getECReconstructionMetrics() {
return this.metrics;
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java
index 6d32f3a3f3e..a50a125f6d4 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinatorTask.java
@@ -46,6 +46,16 @@ public ECReconstructionCoordinatorTask(
debugString = reconstructionCommandInfo.toString();
}
+ @Override
+ public String getMetricName() {
+ return "ECReconstructions";
+ }
+
+ @Override
+ public String getMetricDescriptionSegment() {
+ return "EC reconstructions";
+ }
+
@Override
public void runTask() {
// Implement the coordinator logic to handle a container group
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
index cea6737c7c9..b4ff62e52d2 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
@@ -935,7 +935,6 @@ private ContainerReplicaProto.State getHddsState()
/**
* Returns container DB file.
- * @return
*/
public File getContainerDBFile() {
return KeyValueContainerLocationUtil.getContainerDBFile(containerData);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java
index ccc24dad0f9..708038bd13f 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java
@@ -431,7 +431,6 @@ public KeyPrefixFilter getDeletingBlockKeyFilter() {
/**
* Schema v3 use a prefix as startKey,
* for other schemas just return null.
- * @return
*/
public String startKeyEmpty() {
if (hasSchema(SCHEMA_V3)) {
@@ -443,7 +442,6 @@ public String startKeyEmpty() {
/**
* Schema v3 use containerID as key prefix,
* for other schemas just return null.
- * @return
*/
public String containerPrefix() {
if (hasSchema(SCHEMA_V3)) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index d1028727648..06987f63561 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -103,6 +103,8 @@
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.QUASI_CLOSED;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
+
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.RECOVERING;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_ALREADY_EXISTS;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR;
@@ -110,6 +112,7 @@
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_NON_EMPTY_CONTAINER;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_OPEN_CONTAINER;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.GET_SMALL_FILE_ERROR;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_ARGUMENT;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.INVALID_CONTAINER_STATE;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.IO_EXCEPTION;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.PUT_SMALL_FILE_ERROR;
@@ -132,11 +135,10 @@
import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.putBlockResponseSuccess;
import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.unsupportedRequest;
import static org.apache.hadoop.hdds.scm.utils.ClientCommandsUtils.getReadChunkVersion;
-import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
- .ContainerDataProto.State.RECOVERING;
+
import org.apache.hadoop.ozone.container.common.interfaces.ScanResult;
-import static org.apache.hadoop.ozone.ClientVersion.EC_REPLICA_INDEX_REQUIRED_IN_BLOCK_REQUEST;
+import static org.apache.hadoop.ozone.OzoneConsts.INCREMENTAL_CHUNK_LIST;
import org.apache.hadoop.util.Time;
import org.apache.ratis.statemachine.StateMachine;
@@ -259,6 +261,15 @@ static ContainerCommandResponseProto dispatchRequest(KeyValueHandler handler,
ContainerCommandRequestProto request, KeyValueContainer kvContainer,
DispatcherContext dispatcherContext) {
Type cmdType = request.getCmdType();
+ // Validate the request has been made to the correct datanode with the node id matching.
+ if (kvContainer != null) {
+ try {
+ handler.validateRequestDatanodeId(kvContainer.getContainerData().getReplicaIndex(),
+ request.getDatanodeUuid());
+ } catch (StorageContainerException e) {
+ return ContainerUtils.logAndReturnError(LOG, e, request);
+ }
+ }
switch (cmdType) {
case CreateContainer:
@@ -377,7 +388,23 @@ ContainerCommandResponseProto handleCreateContainer(
" already exists", null, CONTAINER_ALREADY_EXISTS), request);
}
+ try {
+ this.validateRequestDatanodeId(request.getCreateContainer().hasReplicaIndex() ?
+ request.getCreateContainer().getReplicaIndex() : null, request.getDatanodeUuid());
+ } catch (StorageContainerException e) {
+ return ContainerUtils.logAndReturnError(LOG, e, request);
+ }
+
long containerID = request.getContainerID();
+ State containerState = request.getCreateContainer().getState();
+
+ if (containerState != RECOVERING) {
+ try {
+ containerSet.ensureContainerNotMissing(containerID, containerState);
+ } catch (StorageContainerException ex) {
+ return ContainerUtils.logAndReturnError(LOG, ex, request);
+ }
+ }
ContainerLayoutVersion layoutVersion =
ContainerLayoutVersion.getConfiguredVersion(conf);
@@ -402,7 +429,11 @@ ContainerCommandResponseProto handleCreateContainer(
try {
if (containerSet.getContainer(containerID) == null) {
newContainer.create(volumeSet, volumeChoosingPolicy, clusterId);
- created = containerSet.addContainer(newContainer);
+ if (RECOVERING == newContainer.getContainerState()) {
+ created = containerSet.addContainerByOverwriteMissingContainer(newContainer);
+ } else {
+ created = containerSet.addContainer(newContainer);
+ }
} else {
// The create container request for an already existing container can
// arrive in case the ContainerStateMachine reapplies the transaction
@@ -595,14 +626,20 @@ ContainerCommandResponseProto handlePutBlock(
boolean endOfBlock = false;
if (!request.getPutBlock().hasEof() || request.getPutBlock().getEof()) {
- // in EC, we will be doing empty put block.
- // So, let's flush only when there are any chunks
- if (!request.getPutBlock().getBlockData().getChunksList().isEmpty()) {
+ // There are two cases where client sends empty put block with eof.
+ // (1) An EC empty file. In this case, the block/chunk file does not exist,
+ // so no need to flush/close the file.
+ // (2) Ratis output stream in incremental chunk list mode may send empty put block
+ // to close the block, in which case we need to flush/close the file.
+ if (!request.getPutBlock().getBlockData().getChunksList().isEmpty() ||
+ blockData.getMetadata().containsKey(INCREMENTAL_CHUNK_LIST)) {
chunkManager.finishWriteChunks(kvContainer, blockData);
}
endOfBlock = true;
}
+ // Note: checksum held inside blockData. But no extra checksum validation here with handlePutBlock.
+
long bcsId =
dispatcherContext == null ? 0 : dispatcherContext.getLogIndex();
blockData.setBlockCommitSequenceId(bcsId);
@@ -718,15 +755,6 @@ ContainerCommandResponseProto handleGetContainerChecksumInfo(
return getGetContainerMerkleTreeResponse(request, checksumTree);
}
- /**
- * Checks if a replicaIndex needs to be checked based on the client version for a request.
- * @param request ContainerCommandRequest object.
- * @return true if the validation is required for the client version else false.
- */
- private boolean replicaIndexCheckRequired(ContainerCommandRequestProto request) {
- return request.hasVersion() && request.getVersion() >= EC_REPLICA_INDEX_REQUIRED_IN_BLOCK_REQUEST.toProtoValue();
- }
-
/**
* Handle Get Block operation. Calls BlockManager to process the request.
*/
@@ -745,9 +773,7 @@ ContainerCommandResponseProto handleGetBlock(
try {
BlockID blockID = BlockID.getFromProtobuf(
request.getGetBlock().getBlockID());
- if (replicaIndexCheckRequired(request)) {
- BlockUtils.verifyReplicaIdx(kvContainer, blockID);
- }
+ BlockUtils.verifyReplicaIdx(kvContainer, blockID);
responseData = blockManager.getBlock(kvContainer, blockID).getProtoBufMessage();
final long numBytes = responseData.getSerializedSize();
metrics.incContainerBytesStats(Type.GetBlock, numBytes);
@@ -870,9 +896,7 @@ ContainerCommandResponseProto handleReadChunk(
ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(request.getReadChunk()
.getChunkData());
Preconditions.checkNotNull(chunkInfo);
- if (replicaIndexCheckRequired(request)) {
- BlockUtils.verifyReplicaIdx(kvContainer, blockID);
- }
+ BlockUtils.verifyReplicaIdx(kvContainer, blockID);
BlockUtils.verifyBCSId(kvContainer, blockID);
if (dispatcherContext == null) {
@@ -972,6 +996,7 @@ ContainerCommandResponseProto handleWriteChunk(
if (isWrite) {
data =
ChunkBuffer.wrap(writeChunk.getData().asReadOnlyByteBufferList());
+ // TODO: Can improve checksum validation here. Make this one-shot after protocol change.
validateChunkChecksumData(data, chunkInfo);
}
chunkManager
@@ -992,6 +1017,9 @@ ContainerCommandResponseProto handleWriteChunk(
// of order.
blockData.setBlockCommitSequenceId(dispatcherContext.getLogIndex());
boolean eob = writeChunk.getBlock().getEof();
+ if (eob) {
+ chunkManager.finishWriteChunks(kvContainer, blockData);
+ }
blockManager.putBlock(kvContainer, blockData, eob);
blockDataProto = blockData.getProtoBufMessage();
final long numBytes = blockDataProto.getSerializedSize();
@@ -1217,7 +1245,7 @@ private void checkContainerOpen(KeyValueContainer kvContainer)
* might already be in closing state here.
*/
if (containerState == State.OPEN || containerState == State.CLOSING
- || containerState == State.RECOVERING) {
+ || containerState == RECOVERING) {
return;
}
@@ -1698,4 +1726,22 @@ public static FaultInjector getInjector() {
public static void setInjector(FaultInjector instance) {
injector = instance;
}
+
+ /**
+ * Verify if request's replicaIndex matches with containerData. This validates only for EC containers i.e.
+ * containerReplicaIdx should be > 0.
+ *
+ * @param containerReplicaIdx replicaIndex for the container command.
+ * @param requestDatanodeUUID requested block info
+ * @throws StorageContainerException if replicaIndex mismatches.
+ */
+ private boolean validateRequestDatanodeId(Integer containerReplicaIdx, String requestDatanodeUUID)
+ throws StorageContainerException {
+ if (containerReplicaIdx != null && containerReplicaIdx > 0 && !requestDatanodeUUID.equals(this.getDatanodeId())) {
+ throw new StorageContainerException(
+ String.format("Request is trying to write to node with uuid : %s but the current nodeId is: %s .",
+ requestDatanodeUUID, this.getDatanodeId()), INVALID_ARGUMENT);
+ }
+ return true;
+ }
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java
index 7773b54f794..8bbc2478004 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java
@@ -99,7 +99,6 @@ public static DatanodeStore getUncachedDatanodeStore(
* opened by this thread, the other thread will get a RocksDB exception.
* @param containerData The container data
* @param conf Configuration
- * @return
* @throws IOException
*/
public static DatanodeStore getUncachedDatanodeStore(
@@ -248,7 +247,9 @@ public static void verifyBCSId(Container container, BlockID blockID)
public static void verifyReplicaIdx(Container container, BlockID blockID)
throws IOException {
Integer containerReplicaIndex = container.getContainerData().getReplicaIndex();
- if (containerReplicaIndex > 0 && !containerReplicaIndex.equals(blockID.getReplicaIndex())) {
+ Integer blockReplicaIndex = blockID.getReplicaIndex();
+ if (containerReplicaIndex > 0 && blockReplicaIndex != null && blockReplicaIndex != 0 &&
+ !containerReplicaIndex.equals(blockReplicaIndex)) {
throw new StorageContainerException(
"Unable to find the Container with replicaIdx " + blockID.getReplicaIndex() + ". Container "
+ container.getContainerData().getContainerID() + " replicaIdx is "
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
index 0fac45571c7..dc048ac16aa 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java
@@ -39,6 +39,7 @@
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.function.ToLongFunction;
@@ -50,6 +51,7 @@
import org.apache.hadoop.ozone.common.utils.BufferUtils;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
+import org.apache.hadoop.ozone.container.keyvalue.impl.MappedBufferManager;
import org.apache.hadoop.util.Time;
import com.google.common.annotations.VisibleForTesting;
@@ -200,11 +202,12 @@ private static long writeDataToChannel(FileChannel channel, ChunkBuffer data,
}
}
+ @SuppressWarnings("checkstyle:parameternumber")
public static ChunkBuffer readData(long len, int bufferCapacity,
- File file, long off, HddsVolume volume, int readMappedBufferThreshold)
- throws StorageContainerException {
- if (len > readMappedBufferThreshold) {
- return readData(file, bufferCapacity, off, len, volume);
+ File file, long off, HddsVolume volume, int readMappedBufferThreshold, boolean mmapEnabled,
+ MappedBufferManager mappedBufferManager) throws StorageContainerException {
+ if (mmapEnabled && len > readMappedBufferThreshold && bufferCapacity > readMappedBufferThreshold) {
+ return readData(file, bufferCapacity, off, len, volume, mappedBufferManager);
} else if (len == 0) {
return ChunkBuffer.wrap(Collections.emptyList());
}
@@ -256,25 +259,52 @@ private static void readData(File file, long offset, long len,
* @return a list of {@link MappedByteBuffer} containing the data.
*/
private static ChunkBuffer readData(File file, int chunkSize,
- long offset, long length, HddsVolume volume)
+ long offset, long length, HddsVolume volume, MappedBufferManager mappedBufferManager)
throws StorageContainerException {
- final List buffers = new ArrayList<>(
- Math.toIntExact((length - 1) / chunkSize) + 1);
- readData(file, offset, length, channel -> {
- long readLen = 0;
- while (readLen < length) {
- final int n = Math.toIntExact(Math.min(length - readLen, chunkSize));
- final ByteBuffer mapped = channel.map(
- FileChannel.MapMode.READ_ONLY, offset + readLen, n);
- LOG.debug("mapped: offset={}, readLen={}, n={}, {}",
- offset, readLen, n, mapped.getClass());
- readLen += mapped.remaining();
- buffers.add(mapped);
+ final int bufferNum = Math.toIntExact((length - 1) / chunkSize) + 1;
+ if (!mappedBufferManager.getQuota(bufferNum)) {
+ // proceed with normal buffer
+ final ByteBuffer[] buffers = BufferUtils.assignByteBuffers(length,
+ chunkSize);
+ readData(file, offset, length, c -> c.position(offset).read(buffers), volume);
+ Arrays.stream(buffers).forEach(ByteBuffer::flip);
+ return ChunkBuffer.wrap(Arrays.asList(buffers));
+ } else {
+ try {
+ // proceed with mapped buffer
+ final List buffers = new ArrayList<>(bufferNum);
+ readData(file, offset, length, channel -> {
+ long readLen = 0;
+ while (readLen < length) {
+ final int n = Math.toIntExact(Math.min(length - readLen, chunkSize));
+ final long finalOffset = offset + readLen;
+ final AtomicReference exception = new AtomicReference<>();
+ ByteBuffer mapped = mappedBufferManager.computeIfAbsent(file.getAbsolutePath(), finalOffset, n,
+ () -> {
+ try {
+ return channel.map(FileChannel.MapMode.READ_ONLY, finalOffset, n);
+ } catch (IOException e) {
+ LOG.error("Failed to map file {} with offset {} and length {}", file, finalOffset, n);
+ exception.set(e);
+ return null;
+ }
+ });
+ if (mapped == null) {
+ throw exception.get();
+ }
+ LOG.debug("mapped: offset={}, readLen={}, n={}, {}", finalOffset, readLen, n, mapped.getClass());
+ readLen += mapped.remaining();
+ buffers.add(mapped);
+ }
+ return readLen;
+ }, volume);
+ return ChunkBuffer.wrap(buffers);
+ } catch (Throwable e) {
+ mappedBufferManager.releaseQuota(bufferNum);
+ throw e;
}
- return readLen;
- }, volume);
- return ChunkBuffer.wrap(buffers);
+ }
}
/**
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
index b287d9ac133..dd719a81fb3 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
@@ -439,13 +439,13 @@ public static boolean isSameSchemaVersion(String schema, String other) {
/**
* Moves container directory to a new location
- * under "/hdds//tmp/deleted-containers"
+ * under "volume/hdds/cluster-id/tmp/deleted-containers"
* and updates metadata and chunks path.
* Containers will be moved under it before getting deleted
* to avoid, in case of failure, having artifact leftovers
* on the default container path on the disk.
*
- * Delete operation for Schema < V3
+ * Delete operation for Schema < V3
* 1. Container is marked DELETED
* 2. Container is removed from memory container set
* 3. Container DB handler from cache is removed and closed
@@ -460,7 +460,6 @@ public static boolean isSameSchemaVersion(String schema, String other) {
* 5. Container is deleted from tmp directory.
*
* @param keyValueContainerData
- * @return true if renaming was successful
*/
public static void moveToDeletedContainerDir(
KeyValueContainerData keyValueContainerData,
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java
index 7b3852011d3..6232b843567 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java
@@ -64,6 +64,7 @@ public class BlockManagerImpl implements BlockManager {
// Default Read Buffer capacity when Checksum is not present
private final int defaultReadBufferCapacity;
private final int readMappedBufferThreshold;
+ private final int readMappedBufferMaxCount;
/**
* Constructs a Block Manager.
@@ -79,6 +80,9 @@ public BlockManagerImpl(ConfigurationSource conf) {
this.readMappedBufferThreshold = config.getBufferSize(
ScmConfigKeys.OZONE_CHUNK_READ_MAPPED_BUFFER_THRESHOLD_KEY,
ScmConfigKeys.OZONE_CHUNK_READ_MAPPED_BUFFER_THRESHOLD_DEFAULT);
+ this.readMappedBufferMaxCount = config.getInt(
+ ScmConfigKeys.OZONE_CHUNK_READ_MAPPED_BUFFER_MAX_COUNT_KEY,
+ ScmConfigKeys.OZONE_CHUNK_READ_MAPPED_BUFFER_MAX_COUNT_DEFAULT);
}
@Override
@@ -304,6 +308,11 @@ public int getReadMappedBufferThreshold() {
return readMappedBufferThreshold;
}
+ /** @return the max count of memory mapped buffers for read. */
+ public int getReadMappedBufferMaxCount() {
+ return readMappedBufferMaxCount;
+ }
+
/**
* Deletes an existing block.
* As Deletion is handled by BlockDeletingService,
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerFactory.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerFactory.java
index 288a2d3e331..aa5d52f3cee 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerFactory.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerFactory.java
@@ -46,7 +46,6 @@ private ChunkManagerFactory() {
* @param conf Configuration
* @param manager This parameter will be used only for read data of
* FILE_PER_CHUNK layout file. Can be null for other cases.
- * @return
*/
public static ChunkManager createChunkManager(ConfigurationSource conf,
BlockManager manager, VolumeSet volSet) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
index a87b184ccec..4ca578d7717 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerBlockStrategy.java
@@ -75,6 +75,8 @@ public class FilePerBlockStrategy implements ChunkManager {
private final OpenFiles files = new OpenFiles();
private final int defaultReadBufferCapacity;
private final int readMappedBufferThreshold;
+ private final int readMappedBufferMaxCount;
+ private final MappedBufferManager mappedBufferManager;
private final VolumeSet volumeSet;
public FilePerBlockStrategy(boolean sync, BlockManager manager,
@@ -84,7 +86,15 @@ public FilePerBlockStrategy(boolean sync, BlockManager manager,
manager.getDefaultReadBufferCapacity();
this.readMappedBufferThreshold = manager == null ? 0
: manager.getReadMappedBufferThreshold();
+ this.readMappedBufferMaxCount = manager == null ? 0
+ : manager.getReadMappedBufferMaxCount();
+ LOG.info("ozone.chunk.read.mapped.buffer.max.count is load with {}", readMappedBufferMaxCount);
this.volumeSet = volSet;
+ if (this.readMappedBufferMaxCount > 0) {
+ mappedBufferManager = new MappedBufferManager(this.readMappedBufferMaxCount);
+ } else {
+ mappedBufferManager = null;
+ }
}
private static void checkLayoutVersion(Container container) {
@@ -192,10 +202,10 @@ public ChunkBuffer readChunk(Container container, BlockID blockID,
final long len = info.getLen();
long offset = info.getOffset();
- int bufferCapacity = ChunkManager.getBufferCapacityForChunkRead(info,
+ int bufferCapacity = ChunkManager.getBufferCapacityForChunkRead(info,
defaultReadBufferCapacity);
return ChunkUtils.readData(len, bufferCapacity, chunkFile, offset, volume,
- readMappedBufferThreshold);
+ readMappedBufferThreshold, readMappedBufferMaxCount > 0, mappedBufferManager);
}
@Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerChunkStrategy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerChunkStrategy.java
index a649f573bf0..6ac88cad7f5 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerChunkStrategy.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/FilePerChunkStrategy.java
@@ -67,6 +67,8 @@ public class FilePerChunkStrategy implements ChunkManager {
private final BlockManager blockManager;
private final int defaultReadBufferCapacity;
private final int readMappedBufferThreshold;
+ private final int readMappedBufferMaxCount;
+ private final MappedBufferManager mappedBufferManager;
private final VolumeSet volumeSet;
public FilePerChunkStrategy(boolean sync, BlockManager manager,
@@ -77,7 +79,15 @@ public FilePerChunkStrategy(boolean sync, BlockManager manager,
manager.getDefaultReadBufferCapacity();
this.readMappedBufferThreshold = manager == null ? 0
: manager.getReadMappedBufferThreshold();
+ this.readMappedBufferMaxCount = manager == null ? 0
+ : manager.getReadMappedBufferMaxCount();
+ LOG.info("ozone.chunk.read.mapped.buffer.max.count is load with {}", readMappedBufferMaxCount);
this.volumeSet = volSet;
+ if (this.readMappedBufferMaxCount > 0) {
+ mappedBufferManager = new MappedBufferManager(this.readMappedBufferMaxCount);
+ } else {
+ mappedBufferManager = null;
+ }
}
private static void checkLayoutVersion(Container container) {
@@ -265,7 +275,7 @@ public ChunkBuffer readChunk(Container container, BlockID blockID,
long offset = info.getOffset() - chunkFileOffset;
Preconditions.checkState(offset >= 0);
return ChunkUtils.readData(len, bufferCapacity, file, offset, volume,
- readMappedBufferThreshold);
+ readMappedBufferThreshold, readMappedBufferMaxCount > 0, mappedBufferManager);
}
} catch (StorageContainerException ex) {
//UNABLE TO FIND chunk is not a problem as we will try with the
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/MappedBufferManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/MappedBufferManager.java
new file mode 100644
index 00000000000..be2751925c7
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/MappedBufferManager.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.container.keyvalue.impl;
+
+import com.google.common.util.concurrent.Striped;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.ref.WeakReference;
+import java.nio.ByteBuffer;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Lock;
+import java.util.function.Supplier;
+
+/**
+ * A Manager who manages the mapped buffers to under a predefined total count, also support reuse mapped buffers.
+ */
+public class MappedBufferManager {
+
+ private static ConcurrentHashMap> mappedBuffers =
+ new ConcurrentHashMap>();
+ private static final Logger LOG = LoggerFactory.getLogger(MappedBufferManager.class);
+ private final Semaphore semaphore;
+ private final int capacity;
+ private final AtomicBoolean cleanupInProgress = new AtomicBoolean(false);
+ private final Striped lock;
+
+ public MappedBufferManager(int capacity) {
+ this.capacity = capacity;
+ this.semaphore = new Semaphore(capacity);
+ this.lock = Striped.lazyWeakLock(1024);
+ }
+
+ public boolean getQuota(int permits) {
+ boolean ret = semaphore.tryAcquire(permits);
+ if (ret) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("quota is decreased by {} to total {}", permits, semaphore.availablePermits());
+ }
+ } else {
+ if (cleanupInProgress.compareAndSet(false, true)) {
+ CompletableFuture.runAsync(() -> {
+ int p = 0;
+ try {
+ for (String key : mappedBuffers.keySet()) {
+ ByteBuffer buf = mappedBuffers.get(key).get();
+ if (buf == null) {
+ mappedBuffers.remove(key);
+ p++;
+ }
+ }
+ if (p > 0) {
+ releaseQuota(p);
+ }
+ } finally {
+ cleanupInProgress.set(false);
+ }
+ });
+ }
+ }
+ return ret;
+ }
+
+ public void releaseQuota(int permits) {
+ semaphore.release(permits);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("quota is increased by {} to total {}", permits, semaphore.availablePermits());
+ }
+ }
+
+ public int availableQuota() {
+ return semaphore.availablePermits();
+ }
+
+ public ByteBuffer computeIfAbsent(String file, long position, long size,
+ Supplier supplier) {
+ String key = file + "-" + position + "-" + size;
+ Lock fileLock = lock.get(key);
+ fileLock.lock();
+ try {
+ WeakReference refer = mappedBuffers.get(key);
+ if (refer != null && refer.get() != null) {
+ // reuse the mapped buffer
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("find buffer for key {}", key);
+ }
+ releaseQuota(1);
+ return refer.get();
+ }
+
+ ByteBuffer buffer = supplier.get();
+ if (buffer != null) {
+ mappedBuffers.put(key, new WeakReference<>(buffer));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("add buffer for key {}", key);
+ }
+ }
+ return buffer;
+ } finally {
+ fileLock.unlock();
+ }
+ }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/StreamDataChannelBase.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/StreamDataChannelBase.java
index 8df856d4b93..601e7b2712c 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/StreamDataChannelBase.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/StreamDataChannelBase.java
@@ -99,7 +99,9 @@ public void setLinked() {
linked.set(true);
}
- /** @return true iff {@link StateMachine.DataChannel} is already linked. */
+ /**
+ * @return true if {@link org.apache.ratis.statemachine.StateMachine.DataChannel} is already linked.
+ */
public boolean cleanUp() {
if (linked.get()) {
// already linked, nothing to do.
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java
index 6dd8590bdf3..256d357a31d 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java
@@ -99,6 +99,9 @@ void finalizeBlock(Container container, BlockID blockId)
/** @return the threshold to read using memory mapped buffers. */
int getReadMappedBufferThreshold();
+ /** @return the max count of memory mapped buffers to read. */
+ int getReadMappedBufferMaxCount();
+
/**
* Shutdown ContainerManager.
*/
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java
index 26719d7f035..d9edd6d4cb0 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java
@@ -17,27 +17,22 @@
*/
package org.apache.hadoop.ozone.container.metadata;
-import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.StorageUnit;
import org.apache.hadoop.hdds.StringUtils;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
import org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter;
-import org.apache.hadoop.hdds.utils.db.BatchOperationHandler;
import org.apache.hadoop.hdds.utils.db.DBProfile;
import org.apache.hadoop.hdds.utils.db.DBStore;
import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.TableIterator;
-import org.apache.hadoop.hdds.utils.db.managed.ManagedColumnFamilyOptions;
import org.apache.hadoop.hdds.utils.db.managed.ManagedDBOptions;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfoList;
import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator;
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
-import org.apache.hadoop.ozone.container.common.utils.db.DatanodeDBProfile;
-import org.rocksdb.InfoLogLevel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -45,14 +40,11 @@
import java.io.IOException;
import java.util.NoSuchElementException;
-import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE;
-import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE;
-
/**
* Implementation of the {@link DatanodeStore} interface that contains
* functionality common to all more derived datanode store implementations.
*/
-public abstract class AbstractDatanodeStore implements DatanodeStore {
+public class AbstractDatanodeStore extends AbstractRDBStore implements DatanodeStore {
private Table metadataTable;
@@ -68,12 +60,6 @@ public abstract class AbstractDatanodeStore implements DatanodeStore {
public static final Logger LOG =
LoggerFactory.getLogger(AbstractDatanodeStore.class);
- private volatile DBStore store;
- private final AbstractDatanodeDBDefinition dbDef;
- private final ManagedColumnFamilyOptions cfOptions;
-
- private static DatanodeDBProfile dbProfile;
- private final boolean openReadOnly;
/**
* Constructs the metadata store and starts the DB services.
@@ -84,114 +70,64 @@ public abstract class AbstractDatanodeStore implements DatanodeStore {
protected AbstractDatanodeStore(ConfigurationSource config,
AbstractDatanodeDBDefinition dbDef, boolean openReadOnly)
throws IOException {
-
- dbProfile = DatanodeDBProfile
- .getProfile(config.getEnum(HDDS_DB_PROFILE, HDDS_DEFAULT_DB_PROFILE));
-
- // The same config instance is used on each datanode, so we can share the
- // corresponding column family options, providing a single shared cache
- // for all containers on a datanode.
- cfOptions = dbProfile.getColumnFamilyOptions(config);
-
- this.dbDef = dbDef;
- this.openReadOnly = openReadOnly;
- start(config);
+ super(dbDef, config, openReadOnly);
}
@Override
- public void start(ConfigurationSource config)
+ protected DBStore initDBStore(DBStoreBuilder dbStoreBuilder, ManagedDBOptions options, ConfigurationSource config)
throws IOException {
- if (this.store == null) {
- ManagedDBOptions options = dbProfile.getDBOptions();
- options.setCreateIfMissing(true);
- options.setCreateMissingColumnFamilies(true);
-
- if (this.dbDef instanceof DatanodeSchemaOneDBDefinition ||
- this.dbDef instanceof DatanodeSchemaTwoDBDefinition) {
- long maxWalSize = DBProfile.toLong(StorageUnit.MB.toBytes(2));
- options.setMaxTotalWalSize(maxWalSize);
- }
-
- DatanodeConfiguration dc =
- config.getObject(DatanodeConfiguration.class);
- // Config user log files
- InfoLogLevel level = InfoLogLevel.valueOf(
- dc.getRocksdbLogLevel() + "_LEVEL");
- options.setInfoLogLevel(level);
- options.setMaxLogFileSize(dc.getRocksdbLogMaxFileSize());
- options.setKeepLogFileNum(dc.getRocksdbLogMaxFileNum());
-
- if (this.dbDef instanceof DatanodeSchemaThreeDBDefinition) {
- options.setDeleteObsoleteFilesPeriodMicros(
- dc.getRocksdbDeleteObsoleteFilesPeriod());
-
- // For V3, all Rocksdb dir has the same "container.db" name. So use
- // parentDirName(storage UUID)-dbDirName as db metrics name
- this.store = DBStoreBuilder.newBuilder(config, dbDef)
- .setDBOptions(options)
- .setDefaultCFOptions(cfOptions)
- .setOpenReadOnly(openReadOnly)
- .setDBJmxBeanNameName(dbDef.getDBLocation(config).getName() + "-" +
- dbDef.getName())
- .build();
- } else {
- this.store = DBStoreBuilder.newBuilder(config, dbDef)
- .setDBOptions(options)
- .setDefaultCFOptions(cfOptions)
- .setOpenReadOnly(openReadOnly)
- .build();
- }
+ AbstractDatanodeDBDefinition dbDefinition = this.getDbDef();
+ if (dbDefinition instanceof DatanodeSchemaOneDBDefinition ||
+ dbDefinition instanceof DatanodeSchemaTwoDBDefinition) {
+ long maxWalSize = DBProfile.toLong(StorageUnit.MB.toBytes(2));
+ options.setMaxTotalWalSize(maxWalSize);
+ }
+ DatanodeConfiguration dc =
+ config.getObject(DatanodeConfiguration.class);
- // Use the DatanodeTable wrapper to disable the table iterator on
- // existing Table implementations retrieved from the DBDefinition.
- // See the DatanodeTable's Javadoc for an explanation of why this is
- // necessary.
- metadataTable = new DatanodeTable<>(
- dbDef.getMetadataColumnFamily().getTable(this.store));
- checkTableStatus(metadataTable, metadataTable.getName());
-
- // The block iterator this class returns will need to use the table
- // iterator internally, so construct a block data table instance
- // that does not have the iterator disabled by DatanodeTable.
- blockDataTableWithIterator =
- dbDef.getBlockDataColumnFamily().getTable(this.store);
-
- blockDataTable = new DatanodeTable<>(blockDataTableWithIterator);
- checkTableStatus(blockDataTable, blockDataTable.getName());
-
- if (dbDef.getFinalizeBlocksColumnFamily() != null) {
- finalizeBlocksTableWithIterator =
- dbDef.getFinalizeBlocksColumnFamily().getTable(this.store);
-
- finalizeBlocksTable = new DatanodeTable<>(
- finalizeBlocksTableWithIterator);
- checkTableStatus(finalizeBlocksTable, finalizeBlocksTable.getName());
- }
+ if (dbDefinition instanceof DatanodeSchemaThreeDBDefinition) {
+ options.setDeleteObsoleteFilesPeriodMicros(
+ dc.getRocksdbDeleteObsoleteFilesPeriod());
- if (dbDef.getLastChunkInfoColumnFamily() != null) {
- lastChunkInfoTable = new DatanodeTable<>(
- dbDef.getLastChunkInfoColumnFamily().getTable(this.store));
- checkTableStatus(lastChunkInfoTable, lastChunkInfoTable.getName());
- }
+ // For V3, all Rocksdb dir has the same "container.db" name. So use
+ // parentDirName(storage UUID)-dbDirName as db metrics name
+ dbStoreBuilder.setDBJmxBeanNameName(dbDefinition.getDBLocation(config).getName() + "-" +
+ dbDefinition.getName());
}
- }
-
- @Override
- public synchronized void stop() throws Exception {
- if (store != null) {
- store.close();
- store = null;
+ DBStore dbStore = dbStoreBuilder.setDBOptions(options).build();
+
+ // Use the DatanodeTable wrapper to disable the table iterator on
+ // existing Table implementations retrieved from the DBDefinition.
+ // See the DatanodeTable's Javadoc for an explanation of why this is
+ // necessary.
+ metadataTable = new DatanodeTable<>(
+ dbDefinition.getMetadataColumnFamily().getTable(dbStore));
+ checkTableStatus(metadataTable, metadataTable.getName());
+
+ // The block iterator this class returns will need to use the table
+ // iterator internally, so construct a block data table instance
+ // that does not have the iterator disabled by DatanodeTable.
+ blockDataTableWithIterator =
+ dbDefinition.getBlockDataColumnFamily().getTable(dbStore);
+
+ blockDataTable = new DatanodeTable<>(blockDataTableWithIterator);
+ checkTableStatus(blockDataTable, blockDataTable.getName());
+
+ if (dbDefinition.getFinalizeBlocksColumnFamily() != null) {
+ finalizeBlocksTableWithIterator =
+ dbDefinition.getFinalizeBlocksColumnFamily().getTable(dbStore);
+
+ finalizeBlocksTable = new DatanodeTable<>(
+ finalizeBlocksTableWithIterator);
+ checkTableStatus(finalizeBlocksTable, finalizeBlocksTable.getName());
}
- }
- @Override
- public DBStore getStore() {
- return this.store;
- }
-
- @Override
- public BatchOperationHandler getBatchHandler() {
- return this.store;
+ if (dbDefinition.getLastChunkInfoColumnFamily() != null) {
+ lastChunkInfoTable = new DatanodeTable<>(
+ dbDefinition.getLastChunkInfoColumnFamily().getTable(dbStore));
+ checkTableStatus(lastChunkInfoTable, lastChunkInfoTable.getName());
+ }
+ return dbStore;
}
@Override
@@ -240,44 +176,6 @@ public BlockIterator getFinalizeBlockIterator(long containerID,
finalizeBlocksTableWithIterator.iterator(), filter);
}
- @Override
- public synchronized boolean isClosed() {
- if (this.store == null) {
- return true;
- }
- return this.store.isClosed();
- }
-
- @Override
- public void close() throws IOException {
- this.store.close();
- this.cfOptions.close();
- }
-
- @Override
- public void flushDB() throws IOException {
- store.flushDB();
- }
-
- @Override
- public void flushLog(boolean sync) throws IOException {
- store.flushLog(sync);
- }
-
- @Override
- public void compactDB() throws IOException {
- store.compactDB();
- }
-
- @VisibleForTesting
- public DatanodeDBProfile getDbProfile() {
- return dbProfile;
- }
-
- protected AbstractDatanodeDBDefinition getDbDef() {
- return this.dbDef;
- }
-
protected Table getBlockDataTableWithIterator() {
return this.blockDataTableWithIterator;
}
@@ -300,9 +198,9 @@ protected static void checkTableStatus(Table, ?> table, String name)
/**
* Block Iterator for KeyValue Container. This block iterator returns blocks
- * which match with the {@link MetadataKeyFilters.KeyPrefixFilter}. If no
+ * which match with the {@link org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter}. If no
* filter is specified, then default filter used is
- * {@link MetadataKeyFilters#getUnprefixedKeyFilter()}
+ * {@link org.apache.hadoop.hdds.utils.MetadataKeyFilters#getUnprefixedKeyFilter()}
*/
@InterfaceAudience.Public
public static class KeyValueBlockIterator implements
@@ -405,9 +303,9 @@ public void close() throws IOException {
/**
* Block localId Iterator for KeyValue Container.
* This Block localId iterator returns localIds
- * which match with the {@link MetadataKeyFilters.KeyPrefixFilter}. If no
+ * which match with the {@link org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter}. If no
* filter is specified, then default filter used is
- * {@link MetadataKeyFilters#getUnprefixedKeyFilter()}
+ * {@link org.apache.hadoop.hdds.utils.MetadataKeyFilters#getUnprefixedKeyFilter()}
*/
@InterfaceAudience.Public
public static class KeyValueBlockLocalIdIterator implements
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractRDBStore.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractRDBStore.java
new file mode 100644
index 00000000000..5ce1a85b388
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractRDBStore.java
@@ -0,0 +1,135 @@
+package org.apache.hadoop.ozone.container.metadata;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.utils.db.BatchOperationHandler;
+import org.apache.hadoop.hdds.utils.db.DBDefinition;
+import org.apache.hadoop.hdds.utils.db.DBStore;
+import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
+import org.apache.hadoop.hdds.utils.db.managed.ManagedColumnFamilyOptions;
+import org.apache.hadoop.hdds.utils.db.managed.ManagedDBOptions;
+import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
+import org.apache.hadoop.ozone.container.common.utils.db.DatanodeDBProfile;
+import org.rocksdb.InfoLogLevel;
+
+import java.io.IOException;
+
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE;
+import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE;
+
+/**
+ * Abstract Interface defining the way to interact with any rocksDB in the datanode.
+ * @param Generic parameter defining the schema for the DB.
+ */
+public abstract class AbstractRDBStore implements DBStoreManager {
+ private final DEF dbDef;
+ private final ManagedColumnFamilyOptions cfOptions;
+ private static DatanodeDBProfile dbProfile;
+ private final boolean openReadOnly;
+ private volatile DBStore store;
+
+ protected AbstractRDBStore(DEF dbDef, ConfigurationSource config, boolean openReadOnly) throws IOException {
+ dbProfile = DatanodeDBProfile.getProfile(config.getEnum(HDDS_DB_PROFILE, HDDS_DEFAULT_DB_PROFILE));
+
+ // The same config instance is used on each datanode, so we can share the
+ // corresponding column family options, providing a single shared cache
+ // for all containers on a datanode.
+ cfOptions = dbProfile.getColumnFamilyOptions(config);
+ this.dbDef = dbDef;
+ this.openReadOnly = openReadOnly;
+ start(config);
+ }
+
+ public void start(ConfigurationSource config)
+ throws IOException {
+ if (this.store == null) {
+ ManagedDBOptions options = dbProfile.getDBOptions();
+ options.setCreateIfMissing(true);
+ options.setCreateMissingColumnFamilies(true);
+
+ DatanodeConfiguration dc =
+ config.getObject(DatanodeConfiguration.class);
+ // Config user log files
+ InfoLogLevel level = InfoLogLevel.valueOf(
+ dc.getRocksdbLogLevel() + "_LEVEL");
+ options.setInfoLogLevel(level);
+ options.setMaxLogFileSize(dc.getRocksdbLogMaxFileSize());
+ options.setKeepLogFileNum(dc.getRocksdbLogMaxFileNum());
+ this.store = initDBStore(DBStoreBuilder.newBuilder(config, dbDef)
+ .setDBOptions(options)
+ .setDefaultCFOptions(cfOptions)
+ .setOpenReadOnly(openReadOnly), options, config);
+ }
+ }
+
+ protected abstract DBStore initDBStore(DBStoreBuilder dbStoreBuilder, ManagedDBOptions options,
+ ConfigurationSource config) throws IOException;
+
+ public synchronized void stop() throws Exception {
+ if (store != null) {
+ store.close();
+ store = null;
+ }
+ }
+
+ public DBStore getStore() {
+ return this.store;
+ }
+
+ public synchronized boolean isClosed() {
+ if (this.store == null) {
+ return true;
+ }
+ return this.store.isClosed();
+ }
+
+ public BatchOperationHandler getBatchHandler() {
+ return this.store;
+ }
+
+ public void close() throws IOException {
+ this.store.close();
+ this.cfOptions.close();
+ }
+
+ public void flushDB() throws IOException {
+ store.flushDB();
+ }
+
+ public void flushLog(boolean sync) throws IOException {
+ store.flushLog(sync);
+ }
+
+ public void compactDB() throws IOException {
+ store.compactDB();
+ }
+
+ @VisibleForTesting
+ public DatanodeDBProfile getDbProfile() {
+ return dbProfile;
+ }
+
+ protected DEF getDbDef() {
+ return this.dbDef;
+ }
+
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DBStoreManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DBStoreManager.java
new file mode 100644
index 00000000000..ec9849950a0
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DBStoreManager.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.metadata;
+
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.utils.db.BatchOperationHandler;
+import org.apache.hadoop.hdds.utils.db.DBStore;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Interface for interacting with datanode databases.
+ */
+public interface DBStoreManager extends Closeable {
+
+ /**
+ * Start datanode manager.
+ *
+ * @param configuration - Configuration
+ * @throws IOException - Unable to start datanode store.
+ */
+ void start(ConfigurationSource configuration) throws IOException;
+
+ /**
+ * Stop datanode manager.
+ */
+ void stop() throws Exception;
+
+ /**
+ * Get datanode store.
+ *
+ * @return datanode store.
+ */
+ DBStore getStore();
+
+ /**
+ * Helper to create and write batch transactions.
+ */
+ BatchOperationHandler getBatchHandler();
+
+ void flushLog(boolean sync) throws IOException;
+
+ void flushDB() throws IOException;
+
+ void compactDB() throws IOException;
+
+ /**
+ * Returns if the underlying DB is closed. This call is thread safe.
+ * @return true if the DB is closed.
+ */
+ boolean isClosed();
+
+ default void compactionIfNeeded() throws Exception {
+ }
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java
index 4f54e85da2b..bd1c0fb368a 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java
@@ -51,27 +51,21 @@ public class DatanodeSchemaOneDBDefinition
BLOCK_DATA =
new DBColumnFamilyDefinition<>(
StringUtils.bytes2String(DEFAULT_COLUMN_FAMILY),
- String.class,
SchemaOneKeyCodec.get(),
- BlockData.class,
BlockData.getCodec());
public static final DBColumnFamilyDefinition