-
Notifications
You must be signed in to change notification settings - Fork 0
/
LargeFileGenerator.java
executable file
·132 lines (108 loc) · 5.35 KB
/
LargeFileGenerator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
///usr/bin/env jbang "$0" "$@" ; exit $?
//DEPS info.picocli:picocli:4.7.0
import picocli.CommandLine;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.util.Random;
import java.util.concurrent.Callable;
@CommandLine.Command(name = "generate-file", mixinStandardHelpOptions = true, description = "Generates a large file with random data and a specified word.")
public class LargeFileGenerator implements Callable<Integer> {
@CommandLine.Option(names = {"-o", "--output"}, description = "Output file name", defaultValue = "data-file.txt")
private String outputFile;
@CommandLine.Option(names = {"-s", "--size"}, description = "Size of the file in GB", defaultValue = "1")
private long fileSizeGB;
@CommandLine.Option(names = {"-c", "--count"}, description = "Number of occurrences of the word", defaultValue = "100")
private int wordCount;
@CommandLine.Option(names = {"-w", "--word"}, description = "Word to insert", defaultValue = "PEACE")
private String word;
@CommandLine.Option(names = {"-d", "--delimiter"}, description = "Record delimiter", defaultValue = "\n")
private String recordDelimiter;
public static void main(String... args) {
int exitCode = new CommandLine(new LargeFileGenerator()).execute(args);
System.exit(exitCode);
}
@Override
public Integer call() {
try {
new FileGenerator(outputFile, fileSizeGB, wordCount, word, recordDelimiter).generate();
return 0;
} catch (Exception e) {
e.printStackTrace();
return 1;
}
}
private static class FileGenerator {
private static final int CHUNK_SIZE = 1024 * 1024; // 1 MB chunk size for efficiency
private static final Random RANDOM = new SecureRandom(); // SecureRandom for randomness
private static final double PROBABILITY_OF_SPACE = 0.01;
private final String outputFile;
private final long fileSizeGB;
private final int wordCount;
private final String word;
private final String recordDelimiter;
public FileGenerator(String outputFile, long fileSizeGB, int wordCount, String word, String recordDelimiter) {
this.outputFile = outputFile;
this.fileSizeGB = fileSizeGB;
this.wordCount = wordCount;
this.word = " " + word + " "; // add spaces
this.recordDelimiter = recordDelimiter;
}
private static char generateRandomChar() {
String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
return chars.charAt(RANDOM.nextInt(chars.length()));
}
public void generate() {
long fileSizeInBytes = fileSizeGB * 1024L * 1024L * 1024L; // Convert to bytes
try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, StandardCharsets.UTF_8))) {
long remainingSize = fileSizeInBytes;
int wordInserted = 0;
while (remainingSize > 0) {
// Calculate the current chunk size to write
int currentChunkSize = (int) Math.min(CHUNK_SIZE, remainingSize);
// Determine if the word should be inserted in this chunk
boolean insertWord = wordInserted < wordCount && RANDOM.nextDouble() < (double) (wordCount - wordInserted) / (fileSizeInBytes / CHUNK_SIZE);
// Generate random chunk with or without the word
String chunk = generateRandomChunk(currentChunkSize, insertWord);
// Write the chunk to file
writer.write(chunk);
writer.write(recordDelimiter);
// Update remaining size and word insertion count
remainingSize -= currentChunkSize;
if (insertWord) {
wordInserted++;
}
}
// Ensure the remaining word insertions if any are left
while (wordInserted < wordCount) {
String chunk = generateRandomChunk(CHUNK_SIZE, true);
writer.write(chunk);
writer.write(recordDelimiter);
wordInserted++;
}
System.out.println("File generated successfully: " + outputFile + " with " + wordCount + " occurrences of the word '" + word + "'.");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private String generateRandomChunk(int chunkSize, boolean insertWord) {
StringBuilder chunk = new StringBuilder(chunkSize);
for (int i = 0; i < chunkSize; i++) {
// randomly insert a space to form words
if (RANDOM.nextDouble() < PROBABILITY_OF_SPACE) {
chunk.append(' ');
} else {
chunk.append(generateRandomChar());
}
}
// Your code to insert the word into the chunk
if (insertWord) {
int randomIndex = RANDOM.nextInt(chunkSize);
chunk.replace(randomIndex, randomIndex + word.length(), word);
}
return chunk.toString();
}
}
}