From 01441fc9f21fb4e8f7fa0755bfcd868de7f8e564 Mon Sep 17 00:00:00 2001 From: richet Date: Fri, 5 Feb 2021 16:21:54 +0100 Subject: [PATCH] imp. isBinary --- src/main/java/org/funz/util/Disk.java | 29 ++++++- src/test/java/org/funz/DiskTest.java | 18 +++-- src/test/resources/out.txt | 105 ++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 7 deletions(-) create mode 100644 src/test/resources/out.txt diff --git a/src/main/java/org/funz/util/Disk.java b/src/main/java/org/funz/util/Disk.java index 03dc5a1..56b2587 100644 --- a/src/main/java/org/funz/util/Disk.java +++ b/src/main/java/org/funz/util/Disk.java @@ -19,6 +19,7 @@ import java.util.List; import org.apache.commons.io.FileUtils; import org.funz.Protocol; +import java.nio.charset.Charset; public class Disk { @@ -89,6 +90,9 @@ protected static boolean isStringChar(char ch) { } switch (ch) { case ' ': + case '\t': + case '\r': + case '\n': case '/': case '+': case '*': @@ -123,24 +127,45 @@ protected static boolean isStringChar(char ch) { case '\\': return true; } + //System.out.println(""+ch+""); return false; } public static boolean isBinary(File f) { + for (Charset cs: new Charset[]{Charset.forName("UTF-8"),Charset.forName("ISO-8859-1"),Charset.forName("ISO-8859-15")}) { + // Consider as binary if no charset get it as ASCII... + if (!isBinary(f, cs)) return false; + } + return true; + } + + public static boolean isBinary(File f, Charset inputCharset) { boolean isbin = false; java.io.InputStream in = null; try { in = new FileInputStream(f); - BufferedReader r = new BufferedReader(new InputStreamReader(in)); + BufferedReader r = new BufferedReader(new InputStreamReader(in, inputCharset)); - int sample = (int) Math.min(255, f.length()); + int sample = (int) Math.min(1024, f.length()); char[] cc = new char[sample]; //do a peek r.read(cc, 0, sample); double prob_bin = 0; +// int word_size = 10; // will consider word as binary if >50% chars are binary +// for (int i = 0; i < cc.length-word_size; i=i+word_size) { +// double bins = 0; +// for (int j=0; j 0.5) { +// String b = ">"; +// for (int j=0; j