diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java b/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java index 1ff35c2..9a8c4a7 100644 --- a/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java +++ b/src/main/java/ru/ifmo/cet/javabasics/WarAndPeaceExercise.java @@ -1,21 +1,70 @@ package ru.ifmo.cet.javabasics; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.*; public class WarAndPeaceExercise { - public static String warAndPeace() { + public static String warAndPeace() throws IOException { final Path tome12Path = Paths.get("src", "main", "resources", "WAP12.txt"); final Path tome34Path = Paths.get("src", "main", "resources", "WAP34.txt"); - // TODO map lowercased words to its amount in text and concatenate its entries. - // TODO If word "котик" occurred in text 23 times then its entry would be "котик - 23\n". - // TODO Entries in final String should be also sorted by amount and then in alphabetical order if needed. - // TODO Also omit any word with lengths less than 4 and frequency less than 10 + HashMap dictionary= new HashMap<>(); + final Charset charset = Charset.forName("windows-1251"); + + List strings = Files.readAllLines(tome12Path, charset); + strings.addAll(Files.readAllLines(tome34Path, charset)); + for (String string: strings) { + string=string.toLowerCase(); + string=string.replaceAll("[^A-Za-z\u0410-\u042f\u0430-\u044f]", " "); + String[] words=string.split(" "); - throw new UnsupportedOperationException(); - } + for (String word:words) { + if(word.length()<4) continue; + Integer count=dictionary.get(word); + if (dictionary.get(word) == null) { + dictionary.put(word,1); + }else{ + dictionary.put(word,++count); + } + } + } + for(Iterator> it=dictionary.entrySet().iterator();it.hasNext();) { + HashMap.Entry entry = it.next(); + if (entry.getValue() < 10) { + it.remove(); + } + } + //сортировка и отбор в алфавитном порядке + ArrayList list_result= new ArrayList<>(); + while (!dictionary.isEmpty()){ + HashMap.Entry Max_entry=dictionary.entrySet().iterator().next(); + for (Map.Entry entry : dictionary.entrySet()) { + if (Max_entry.getValue() < entry.getValue()) { + Max_entry = entry; + } else if (Max_entry.getValue().equals(entry.getValue())) { + if (Max_entry.getKey().compareTo(entry.getKey()) > 0) { + Max_entry = entry; + } + } + } + String string=Max_entry.getKey()+ " - "+Max_entry.getValue(); + list_result.add(string); + dictionary.remove(Max_entry.getKey()); + } + + String result=""; + for (String s : list_result) + { + result += s + "\n"; + } + result=result.substring(0,result.length()-1); + return result; + } } \ No newline at end of file