From 7903a5942f3ee59b458d2073686547ec06b12d63 Mon Sep 17 00:00:00 2001 From: Michal Vala Date: Mon, 24 Jul 2017 11:59:42 +0200 Subject: [PATCH 1/3] checksum calculation optimization to lower memory consumption --- src/main/java/cz/sparko/boxitory/App.java | 2 +- .../sparko/boxitory/conf/AppProperties.java | 9 ++++ .../boxitory/factory/HashServiceFactory.java | 13 +++--- ....java => FilesystemDigestHashService.java} | 46 +++++++++---------- src/main/resources/logback.xml | 16 +++++++ ...a => FilesystemDigestHashServiceTest.java} | 27 ++++++----- .../service/HashServiceFactoryTest.java | 15 ++++-- 7 files changed, 81 insertions(+), 47 deletions(-) rename src/main/java/cz/sparko/boxitory/service/{DigestHashService.java => FilesystemDigestHashService.java} (51%) create mode 100644 src/main/resources/logback.xml rename src/test/java/cz/sparko/boxitory/service/{DigestHashServiceTest.java => FilesystemDigestHashServiceTest.java} (69%) diff --git a/src/main/java/cz/sparko/boxitory/App.java b/src/main/java/cz/sparko/boxitory/App.java index 487f72d..db4acec 100644 --- a/src/main/java/cz/sparko/boxitory/App.java +++ b/src/main/java/cz/sparko/boxitory/App.java @@ -22,7 +22,7 @@ public static void main(String[] args) { @Bean @Autowired public BoxRepository boxRepository(AppProperties appProperties) throws NoSuchAlgorithmException { - HashService hashService = HashServiceFactory.createHashService(appProperties.getChecksum()); + HashService hashService = HashServiceFactory.createHashService(appProperties); return new FilesystemBoxRepository(appProperties, hashService); } } diff --git a/src/main/java/cz/sparko/boxitory/conf/AppProperties.java b/src/main/java/cz/sparko/boxitory/conf/AppProperties.java index fe56782..5e0f01d 100644 --- a/src/main/java/cz/sparko/boxitory/conf/AppProperties.java +++ b/src/main/java/cz/sparko/boxitory/conf/AppProperties.java @@ -10,6 +10,7 @@ public class AppProperties { private String host_prefix = ""; private String checksum = "disabled"; private boolean sort_desc = false; + private int checksum_buffer_size = 1024; public String getHome() { return home; @@ -27,6 +28,10 @@ public String getChecksum() { return checksum; } + public int getChecksum_buffer_size() { + return checksum_buffer_size; + } + public void setSort_desc(boolean sort_desc) { this.sort_desc = sort_desc; } @@ -42,4 +47,8 @@ public void setHost_prefix(String host_prefix) { public void setChecksum(String checksum) { this.checksum = checksum; } + + public void setChecksum_buffer_size(int checksum_buffer_size) { + this.checksum_buffer_size = checksum_buffer_size; + } } diff --git a/src/main/java/cz/sparko/boxitory/factory/HashServiceFactory.java b/src/main/java/cz/sparko/boxitory/factory/HashServiceFactory.java index d6eec2a..a1e7e84 100644 --- a/src/main/java/cz/sparko/boxitory/factory/HashServiceFactory.java +++ b/src/main/java/cz/sparko/boxitory/factory/HashServiceFactory.java @@ -1,7 +1,8 @@ package cz.sparko.boxitory.factory; +import cz.sparko.boxitory.conf.AppProperties; +import cz.sparko.boxitory.service.FilesystemDigestHashService; import cz.sparko.boxitory.service.NoopHashService; -import cz.sparko.boxitory.service.DigestHashService; import cz.sparko.boxitory.service.HashService; import java.security.MessageDigest; @@ -9,16 +10,16 @@ public class HashServiceFactory { - public static HashService createHashService(String algorithm) throws NoSuchAlgorithmException { - algorithm = algorithm.toUpperCase(); + public static HashService createHashService(AppProperties appProperties) throws NoSuchAlgorithmException { + String algorithm = appProperties.getChecksum().toUpperCase(); switch (algorithm) { case "MD5": - return new DigestHashService(MessageDigest.getInstance(algorithm)); + return new FilesystemDigestHashService(MessageDigest.getInstance(algorithm), appProperties); case "SHA1": - return new DigestHashService(MessageDigest.getInstance("SHA-1")); + return new FilesystemDigestHashService(MessageDigest.getInstance("SHA-1"), appProperties); case "SHA256": - return new DigestHashService(MessageDigest.getInstance("SHA-256")); + return new FilesystemDigestHashService(MessageDigest.getInstance("SHA-256"), appProperties); case "DISABLED": return new NoopHashService(); default: diff --git a/src/main/java/cz/sparko/boxitory/service/DigestHashService.java b/src/main/java/cz/sparko/boxitory/service/FilesystemDigestHashService.java similarity index 51% rename from src/main/java/cz/sparko/boxitory/service/DigestHashService.java rename to src/main/java/cz/sparko/boxitory/service/FilesystemDigestHashService.java index 3a003b5..ebcfb04 100644 --- a/src/main/java/cz/sparko/boxitory/service/DigestHashService.java +++ b/src/main/java/cz/sparko/boxitory/service/FilesystemDigestHashService.java @@ -1,22 +1,26 @@ package cz.sparko.boxitory.service; +import cz.sparko.boxitory.conf.AppProperties; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.xml.bind.DatatypeConverter; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.security.MessageDigest; import java.util.Objects; -public class DigestHashService implements HashService { +public class FilesystemDigestHashService implements HashService { - private static final Logger LOG = LoggerFactory.getLogger(DigestHashService.class); + private static final Logger LOG = LoggerFactory.getLogger(FilesystemDigestHashService.class); private MessageDigest messageDigest; + private int streamBufferLength; - public DigestHashService(MessageDigest messageDigest) { + public FilesystemDigestHashService(MessageDigest messageDigest, AppProperties appProperties) { this.messageDigest = messageDigest; + streamBufferLength = appProperties.getChecksum_buffer_size(); } @Override @@ -26,29 +30,23 @@ public String getHashType() { @Override public String getChecksum(String string) { - byte[] bytes; - File file = new File(string); - try { - bytes = getByteArrayFromFile(file); + try (InputStream boxDataStream = Files.newInputStream(new File(string).toPath())) { + LOG.trace("buffering box data (buffer size [{}]b) ...", streamBufferLength); + final byte[] buffer = new byte[streamBufferLength]; + int read = boxDataStream.read(buffer, 0, streamBufferLength); + + while (read > -1) { + messageDigest.update(buffer, 0, read); + read = boxDataStream.read(buffer, 0, streamBufferLength); + } } catch (IOException e) { - LOG.error("Error during processing file [{}], message: [{}]", file, e.getMessage()); + LOG.error("Error during processing file [{}], message: [{}]", string, e.getMessage()); throw new RuntimeException( "Error while getting checksum for file " + string + " reason: " + e.getMessage(), e ); } - return getHash( - getDigestBytes(bytes) - ); - } - - private byte[] getByteArrayFromFile(File file) throws IOException { - return Files.readAllBytes(file.toPath()); - } - - private byte[] getDigestBytes(byte[] bytes) { - messageDigest.update(bytes); - return messageDigest.digest(); + return getHash(messageDigest.digest()); } private String getHash(byte[] diggestBytes) { @@ -57,16 +55,16 @@ private String getHash(byte[] diggestBytes) { @Override public String toString() { - return "DigestHashService{" + + return "FilesystemDigestHashService{" + "messageDigest=" + messageDigest + '}'; } @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DigestHashService that = (DigestHashService) o; + if (this == o) { return true; } + if (o == null || getClass() != o.getClass()) { return false; } + FilesystemDigestHashService that = (FilesystemDigestHashService) o; return messageDigest.getAlgorithm().equals(that.messageDigest.getAlgorithm()); } diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml new file mode 100644 index 0000000..98bd618 --- /dev/null +++ b/src/main/resources/logback.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/java/cz/sparko/boxitory/service/DigestHashServiceTest.java b/src/test/java/cz/sparko/boxitory/service/FilesystemDigestHashServiceTest.java similarity index 69% rename from src/test/java/cz/sparko/boxitory/service/DigestHashServiceTest.java rename to src/test/java/cz/sparko/boxitory/service/FilesystemDigestHashServiceTest.java index 27e6b94..c4c274d 100644 --- a/src/test/java/cz/sparko/boxitory/service/DigestHashServiceTest.java +++ b/src/test/java/cz/sparko/boxitory/service/FilesystemDigestHashServiceTest.java @@ -1,5 +1,6 @@ package cz.sparko.boxitory.service; +import cz.sparko.boxitory.conf.AppProperties; import org.apache.commons.io.FileUtils; import org.springframework.boot.test.context.SpringBootTest; import org.testng.annotations.AfterClass; @@ -8,6 +9,7 @@ import org.testng.annotations.Test; import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -15,7 +17,7 @@ import static org.testng.Assert.assertEquals; @SpringBootTest -public class DigestHashServiceTest { +public class FilesystemDigestHashServiceTest { private final String TEST_HOME = "test_repository"; private File testHomeDir; @@ -37,9 +39,11 @@ private void createTestFolderStructure() throws IOException { f26.mkdir(); f27.mkdir(); - new File(f25.getAbsolutePath() + "/f25_1_virtualbox.box").createNewFile(); - new File(f25.getAbsolutePath() + "/f25_2_virtualbox.box").createNewFile(); - new File(f25.getAbsolutePath() + "/f25_3_virtualbox.box").createNewFile(); + File f25box1 = new File(f25.getAbsolutePath() + "/f25_1_virtualbox.box"); + f25box1.createNewFile(); + FileWriter fileWriter = new FileWriter(f25box1); + fileWriter.write("123456789\n987654321\nabcdefghi"); + fileWriter.close(); } @AfterClass @@ -53,24 +57,25 @@ public Object[][] filesAndHashes() { { "MD5", new File(testHomeDir.getAbsolutePath() + "/f25/f25_1_virtualbox.box"), - "d41d8cd98f00b204e9800998ecf8427e" + "86462c346f1358ddbf4f137fb5da43cf" }, { "SHA-1", - new File(testHomeDir.getAbsolutePath() + "/f25/f25_2_virtualbox.box"), - "da39a3ee5e6b4b0d3255bfef95601890afd80709" + new File(testHomeDir.getAbsolutePath() + "/f25/f25_1_virtualbox.box"), + "6efeafd3d3304cf5d7fd37db2a7ddbaac09f425d" }, { "SHA-256", - new File(testHomeDir.getAbsolutePath() + "/f25/f25_3_virtualbox.box"), - "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + new File(testHomeDir.getAbsolutePath() + "/f25/f25_1_virtualbox.box"), + "ae4fe7f29f683d3901d4c620ef2e3c7ed17ebb6813158efd6a16f81b71a0aa43" } }; } @Test(dataProvider = "filesAndHashes") - public void givenHashService_whenGetChecksum_thenChecksumsAreEquals(String algorithm, File file, String expectedChecksum) throws NoSuchAlgorithmException { - HashService hashService = new DigestHashService(MessageDigest.getInstance(algorithm)); + public void givenHashService_whenGetChecksum_thenChecksumsAreEquals(String algorithm, File file, String + expectedChecksum) throws NoSuchAlgorithmException { + HashService hashService = new FilesystemDigestHashService(MessageDigest.getInstance(algorithm), new AppProperties()); String checksum = hashService.getChecksum(file.getAbsolutePath()); diff --git a/src/test/java/cz/sparko/boxitory/service/HashServiceFactoryTest.java b/src/test/java/cz/sparko/boxitory/service/HashServiceFactoryTest.java index da3b91c..aa0bd20 100644 --- a/src/test/java/cz/sparko/boxitory/service/HashServiceFactoryTest.java +++ b/src/test/java/cz/sparko/boxitory/service/HashServiceFactoryTest.java @@ -1,5 +1,6 @@ package cz.sparko.boxitory.service; +import cz.sparko.boxitory.conf.AppProperties; import cz.sparko.boxitory.factory.HashServiceFactory; import org.springframework.boot.test.context.SpringBootTest; import org.testng.annotations.DataProvider; @@ -17,22 +18,26 @@ public class HashServiceFactoryTest { @DataProvider public Object[][] hashServiceTypes() throws NoSuchAlgorithmException { return new Object[][]{ - {"md5", new DigestHashService(MessageDigest.getInstance("MD5"))}, - {"sha1", new DigestHashService(MessageDigest.getInstance("SHA-1"))}, - {"sha256", new DigestHashService(MessageDigest.getInstance("SHA-256"))}, + {"md5", new FilesystemDigestHashService(MessageDigest.getInstance("MD5"), new AppProperties())}, + {"sha1", new FilesystemDigestHashService(MessageDigest.getInstance("SHA-1"), new AppProperties())}, + {"sha256", new FilesystemDigestHashService(MessageDigest.getInstance("SHA-256"), new AppProperties())}, {"disabled", new NoopHashService()} }; } @Test(dataProvider = "hashServiceTypes") public void givenFactory_whenCreateHashService_thenGetExpectedInstance(String type, HashService expectedService) throws NoSuchAlgorithmException { - HashService hashService = HashServiceFactory.createHashService(type); + AppProperties appProperties = new AppProperties(); + appProperties.setChecksum(type); + HashService hashService = HashServiceFactory.createHashService(appProperties); assertEquals(hashService, expectedService); } @Test(expectedExceptions = IllegalArgumentException.class) public void givenFactory_whenCreateUnsupportedHashService_thenExceptionIsThrown() throws NoSuchAlgorithmException { - HashService hashService = HashServiceFactory.createHashService("foo"); + AppProperties appProperties = new AppProperties(); + appProperties.setChecksum("foo"); + HashServiceFactory.createHashService(appProperties); } } From e6eca6129264016148dbad873688059137243598 Mon Sep 17 00:00:00 2001 From: Michal Vala Date: Mon, 24 Jul 2017 12:09:00 +0200 Subject: [PATCH 2/3] added checksum_buffer_size option description --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0b3f2b3..79e9eaa 100644 --- a/README.md +++ b/README.md @@ -82,8 +82,12 @@ $ curl http://localhost:8083/f26 * default value: `disabled` * when default or `disabled` boxes output json not contains properties `checksumType` and `checksum` * when `md5|sha1|sha256` boxes output json contains properties `checksumType` and `checksum` with coresponding values +### Advanced Options + * `box.checksum_buffer_size` + * Box file is loaded to this buffer to calculate box checksums + * default value: `1024` - +### How to configuration Configuration can be provided by `application.properties` file on classpath ``` # application.properties From 5699d8273fcc5a919954134992b7f94885211148 Mon Sep 17 00:00:00 2001 From: Michal Vala Date: Mon, 24 Jul 2017 12:16:29 +0200 Subject: [PATCH 3/3] log just to console for now --- src/main/resources/logback.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index 98bd618..b9cbec6 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -1,8 +1,9 @@ + - + @@ -12,5 +13,4 @@ - \ No newline at end of file