diff --git a/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java b/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java index db16f2f..5120c72 100644 --- a/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java +++ b/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java @@ -2,20 +2,43 @@ import java.nio.file.Path; import java.nio.file.Paths; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.util.*; +import java.util.stream.Collectors; +import static java.util.Arrays.asList; public class WarAndPeaceExercise { - public static String warAndPeace() { + + public static String warAndPeace() throws IOException { final Path tome12Path = Paths.get("src", "main", "resources", "WAP12.txt"); final Path tome34Path = Paths.get("src", "main", "resources", "WAP34.txt"); + final Charset charset = Charset.forName("windows-1251"); + List lines = Files.readAllLines(tome12Path, charset); + lines.addAll(Files.readAllLines(tome34Path, charset)); + List content = new ArrayList<>(); + lines.stream().map(s->s.toLowerCase()).map(s -> s.replaceAll("[^a-zа-я]", " ")).map(replace -> asList(replace.split(" "))).forEach(list-> content.addAll(list)); + List content1=content.stream().filter(s->s.length()>=4).collect(Collectors.toList()); + Map words=new LinkedHashMap<>(); + content1.forEach(key -> { + words.put(key, words.containsKey(key) ? words.get(key) + 1 : 1); + }); words.entrySet().removeIf(entires->entires.getValue()<10); + List> sort_words = new ArrayList(words.entrySet()); + sort_words.sort(Map.Entry.comparingByKey()); + sort_words.sort(Map.Entry.comparingByValue().reversed()); + String res=sort_words.stream().map(entry->entry.getKey() + " - " + entry.getValue()).collect(Collectors.joining("\n")); + return res; + + + + + + - // TODO map lowercased words to its amount in text and concatenate its entries. - // TODO Iff word "котик" occurred in text 23 times then its entry would be "котик - 23\n". - // TODO Entries in final String should be also sorted by amount and then in alphabetical order iff needed. - // TODO Also omit any word with lengths less than 4 and frequency less than 10 - throw new UnsupportedOperationException(); } } \ No newline at end of file