From 7adedf02854669c8ce9923dcb1d741c5554ad370 Mon Sep 17 00:00:00 2001 From: duangsuse Date: Sun, 7 Apr 2019 01:11:35 +0800 Subject: [PATCH] Initial commit --- Main.java | 37 +++++ helper/Strings.java | 23 +++ model/ForwardedMessageHead.java | 55 +++++++ model/Message.java | 145 ++++++++++++++++++ model/MessageBodyType.java | 26 ++++ model/MessageHead.java | 71 +++++++++ model/MessageHeaderType.java | 38 +++++ model/RepliedMessageHead.java | 55 +++++++ scanner/RegexConstants.java | 212 +++++++++++++++++++++++++++ sourcemanager/Identifiable.java | 19 +++ sourcemanager/SimpleMapDelegate.java | 76 ++++++++++ sourcemanager/SourceLocation.java | 84 +++++++++++ sourcemanager/SourceManager.java | 24 +++ 13 files changed, 865 insertions(+) create mode 100644 Main.java create mode 100644 helper/Strings.java create mode 100644 model/ForwardedMessageHead.java create mode 100644 model/Message.java create mode 100644 model/MessageBodyType.java create mode 100644 model/MessageHead.java create mode 100644 model/MessageHeaderType.java create mode 100644 model/RepliedMessageHead.java create mode 100644 scanner/RegexConstants.java create mode 100644 sourcemanager/Identifiable.java create mode 100644 sourcemanager/SimpleMapDelegate.java create mode 100644 sourcemanager/SourceLocation.java create mode 100644 sourcemanager/SourceManager.java diff --git a/Main.java b/Main.java new file mode 100644 index 0000000..abc6a38 --- /dev/null +++ b/Main.java @@ -0,0 +1,37 @@ +package org.duangsuse.telegramscanner; + +import java.io.PrintStream; + +/** + * Application main class + * + * @author duangsuse + * @version 1.0 + */ +public class Main { + private Main() {} + + /** + * Program version name + */ + @SuppressWarnings("WeakerAccess") + public static final String VERSION = "1.0"; + + /** + * Standard output + */ + private static PrintStream out = System.out; + /** + * Standard input + */ + private static PrintStream err = System.err; + + /** + * Program entrance + *
+ * @param args file(path)s to be processed + */ + public static void main(String... args) { + err.print("TelegramScanner version "); err.println(VERSION); + } +} diff --git a/helper/Strings.java b/helper/Strings.java new file mode 100644 index 0000000..a9a28ea --- /dev/null +++ b/helper/Strings.java @@ -0,0 +1,23 @@ +package org.duangsuse.telegramscanner.helper; + +/** + * String helper program + */ +public final class Strings { + /** + * Take head n, or entire string + * + * @param n take at most + * @param str target string + * @return if str.length greater than n, then sub-sequence str, else return str + */ + public static String take(int n, String str) { + if (n == 0 || str.length() == 0) + return ""; + + if (str.length() > n) + return str.substring(0, n - 1); + else /* = n or < n */ + return str; + } +} diff --git a/model/ForwardedMessageHead.java b/model/ForwardedMessageHead.java new file mode 100644 index 0000000..6330961 --- /dev/null +++ b/model/ForwardedMessageHead.java @@ -0,0 +1,55 @@ +package org.duangsuse.telegramscanner.model; + +import org.jetbrains.annotations.Contract; + +import java.util.Date; + +/** + * Telegram forwarded-from message header + * + * @see MessageHead it's prototype + */ +public class ForwardedMessageHead extends MessageHead { + /** + * forwarded from telegram id + */ + private String forwardedFrom; + + public ForwardedMessageHead(String name, Date date, String from) { + super(name, date); + this.forwardedFrom = from; + } + + @Override + public String toString() { + return "ForwardedMessageHead{" + super.toString() + '}' + + "(origin '" + forwardedFrom + '\'' + ')'; + } + + public String getForwardedFrom() { + return forwardedFrom; + } + + public void setForwardedFrom(String forwardedFrom) { + this.forwardedFrom = forwardedFrom; + } + + @Contract(value = "null -> false", pure = true) + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + + ForwardedMessageHead that = (ForwardedMessageHead) o; + + return forwardedFrom.equals(that.forwardedFrom); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + forwardedFrom.hashCode(); + return result; + } +} diff --git a/model/Message.java b/model/Message.java new file mode 100644 index 0000000..101ac8d --- /dev/null +++ b/model/Message.java @@ -0,0 +1,145 @@ +package org.duangsuse.telegramscanner.model; + +import org.duangsuse.telegramscanner.helper.Strings; + +import java.util.Collection; +import java.util.LinkedList; + +/** + * Telegram message with following fields: + *
+ * + */ +public class Message { + private MessageHeaderType headerType = MessageHeaderType.NORMAL; + /** + * Message header (name, publishedAt) + */ + private MessageHead header = new MessageHead(); + /** + * Extra data, like filename information included in message + */ + private T messageExtRef; + + private MessageBodyType bodyType = MessageBodyType.NORMAL; + /** + * Message body string + */ + private String messageBody = ""; + + private Collection links = new LinkedList(); + private Collection hashtags = new LinkedList(); + + /** + * messageBody toString preview length + */ + private static final int BODY_PREVIEW_LEN = 10; + + /** + * Blank constructor + */ + public Message() {} + + @Override + public String toString() { + // count links and hashtags + final StringBuilder desc = new StringBuilder(); + if (links.size() != 0) desc.append(links.size()).append(" links"); + if (hashtags.size() != 0) desc.append(hashtags.size()).append(" tags"); + + final String fmt = "Message{Hd%s, Bd%s, ext=%s}[%s](%s..., %s)"; + return String.format(fmt, headerType, bodyType, messageExtRef.toString(), header, Strings.take(BODY_PREVIEW_LEN, messageBody), desc); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + Message message = (Message) o; + + if (headerType != message.headerType) return false; + if (!header.equals(message.header)) return false; + if (messageExtRef != null ? !messageExtRef.equals(message.messageExtRef) : message.messageExtRef != null) + return false; + if (bodyType != message.bodyType) return false; + if (!messageBody.equals(message.messageBody)) return false; + if (!links.equals(message.links)) return false; + return hashtags.equals(message.hashtags); + } + + @Override + public int hashCode() { + int result = headerType.hashCode(); + result = 31 * result + header.hashCode(); + result = 31 * result + (messageExtRef != null ? messageExtRef.hashCode() : 0); + result = 31 * result + bodyType.hashCode(); + result = 31 * result + messageBody.hashCode(); + result = 31 * result + links.hashCode(); + result = 31 * result + hashtags.hashCode(); + return result; + } + + public MessageHeaderType getHeaderType() { + return headerType; + } + + public void setHeaderType(MessageHeaderType headerType) { + this.headerType = headerType; + } + + public MessageHead getHeader() { + return header; + } + + public void setHeader(MessageHead header) { + this.header = header; + } + + public T getMessageExtRef() { + return messageExtRef; + } + + public void setMessageExtRef(T messageExtRef) { + this.messageExtRef = messageExtRef; + } + + public MessageBodyType getBodyType() { + return bodyType; + } + + public void setBodyType(MessageBodyType bodyType) { + this.bodyType = bodyType; + } + + public String getMessageBody() { + return messageBody; + } + + public void setMessageBody(String messageBody) { + this.messageBody = messageBody; + } + + public Collection getLinks() { + return links; + } + + public void setLinks(Collection links) { + this.links = links; + } + + public Collection getHashtags() { + return hashtags; + } + + public void setHashtags(Collection hashtags) { + this.hashtags = hashtags; + } +} diff --git a/model/MessageBodyType.java b/model/MessageBodyType.java new file mode 100644 index 0000000..19f7438 --- /dev/null +++ b/model/MessageBodyType.java @@ -0,0 +1,26 @@ +package org.duangsuse.telegramscanner.model; + +/** + * Special message body type + */ +public enum MessageBodyType { + /** + * Normal text message + */ + NORMAL, + + /** + * Has links + */ + HAS_LINKS, + + /** + * Has hashtags + */ + HAS_HASHTAGS, + + /** + * Has links and hashtags + */ + HAS_LINKS_AND_HASTAGS +} diff --git a/model/MessageHead.java b/model/MessageHead.java new file mode 100644 index 0000000..4a29dae --- /dev/null +++ b/model/MessageHead.java @@ -0,0 +1,71 @@ +package org.duangsuse.telegramscanner.model; + +import org.duangsuse.telegramscanner.sourcemanager.Identifiable; +import org.jetbrains.annotations.Contract; + +import java.util.Date; + +/** + * Telegram message header object + */ +public class MessageHead implements Identifiable { + /** + * Message origin name + */ + private String sourceName; + /** + * Message publication date + */ + private Date publishedAt; + + MessageHead() {} + public MessageHead(String name, Date date) { + sourceName = name; + publishedAt = date; + } + + public Date getPublishedAt() { + return publishedAt; + } + + public void setPublishedAt(Date publishedAt) { + this.publishedAt = publishedAt; + } + + public String getSourceName() { + return sourceName; + } + + public void setSourceName(String sourceName) { + this.sourceName = sourceName; + } + + @Override + public String toString() { + return "MessageHead(" + sourceName + "@" + publishedAt + ')'; + } + + @Contract(value = "null -> false", pure = true) + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + MessageHead that = (MessageHead) o; + + if (!sourceName.equals(that.sourceName)) return false; + return publishedAt.equals(that.publishedAt); + } + + @Override + public int hashCode() { + int result = sourceName.hashCode(); + result = 31 * result + publishedAt.hashCode(); + return result; + } + + @Override + public int getIdentity() { + return System.identityHashCode(this); + } +} diff --git a/model/MessageHeaderType.java b/model/MessageHeaderType.java new file mode 100644 index 0000000..8f31445 --- /dev/null +++ b/model/MessageHeaderType.java @@ -0,0 +1,38 @@ +package org.duangsuse.telegramscanner.model; + +/** + * Telegram Message header type + */ +public enum MessageHeaderType { + /** + * Normal head + */ + NORMAL, + + /** + * Reply to message + */ + RELPY, + /** + * Forwarded message + */ + FORWARDED, + + /** + * Photo + */ + A_PHOTO, + /** + * Album + */ + A_ALBUM, + + /** + * Has a file + */ + HAS_FILE, + /** + * Is a sticker + */ + IS_STICKER +} diff --git a/model/RepliedMessageHead.java b/model/RepliedMessageHead.java new file mode 100644 index 0000000..5f1390e --- /dev/null +++ b/model/RepliedMessageHead.java @@ -0,0 +1,55 @@ +package org.duangsuse.telegramscanner.model; + +import org.jetbrains.annotations.Contract; + +import java.util.Date; + +/** + * Telegram reply-to message header + * + * @see MessageHead it's prototype + */ +public class RepliedMessageHead extends MessageHead { + /** + * Replied to telegram id + */ + private String repliedTo; + + public RepliedMessageHead(String name, Date date, String repliedTo) { + super(name, date); + this.repliedTo = repliedTo; + } + + public String getRepliedTo() { + return repliedTo; + } + + public void setRepliedTo(String repliedTo) { + this.repliedTo = repliedTo; + } + + @Override + public String toString() { + return "RepliedMessageHead{" + super.toString() + '}' + + "(replies '" + repliedTo + '\'' + ')'; + } + + @Contract(value = "null -> false", pure = true) + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + + RepliedMessageHead that = (RepliedMessageHead) o; + + return repliedTo.equals(that.repliedTo); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + repliedTo.hashCode(); + return result; + } +} diff --git a/scanner/RegexConstants.java b/scanner/RegexConstants.java new file mode 100644 index 0000000..de3dc5d --- /dev/null +++ b/scanner/RegexConstants.java @@ -0,0 +1,212 @@ +package org.duangsuse.telegramscanner.scanner; + +import java.util.regex.Pattern; + +/** + * A top-level class made for RegExp constants + *
+ * Used by Telegram message line scanner + * + *

{@link Pattern} Examples

+ *

+ * import java.util.regex.*;
+ *
+ * Pattern pat = Pattern.compile("(^|\\s)#(?!#)((\\S(?<![\\(\\)]))+)");
+ * Matcher m = pat.matcher("#abc #dev");
+ * m.reset();
+ *
+ * while (m.find()) { println(m.group().trim()); }
+ * 
+ * + *
+ *

Regular expression usage

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
NameMatches
Message Headname, [dd.MM.yy hh.mm]
Head ReplyIn reply to name
Head ForwardForwarded from name
Is Album / Photo(The message containing a photo or message)
Sticker? Sticker
FileFile : filename
+ * + * @see Pattern#compile(String) Regexp API used + */ +public final class RegexConstants { + /** + * Make a new pattern using code string + * + * @param regex regex expression string, should not be null + * @return compiled regexp {@link Pattern#compile(String)} + */ + private static @org.jetbrains.annotations.NotNull Pattern $(@org.jetbrains.annotations.NotNull String regex) { return Pattern.compile(regex); } + + /** + * New message header definition
+ * in form (name), [dd.mm.yy hh:mm] + *


+ * Examples + *

    + *
  • duangsuse::Echo, [24.03.19 11:22]
  • + *
  • name, [dd.MM.yy hh.mm]
  • + *
+ */ + public static final Pattern MESSAGE_HEAD = $("^(.+), \\[(\\d{2})\\.(\\d{2})\\.(\\d{2}) (\\d{2}):(\\d{2})\\]$"); + + /** + * Reply-to message header definition + *


+ * Examples + *

    + *
  • [In reply to duangsuse::Echo]
  • + *
+ */ + public static final Pattern HEAD_REPLY = $("^\\[In reply to (.+)\\]$"); + /** + * Forwarded from (display name) + *


+ * Examples + *

    + *
  • [Forwarded from 羽毛的小白板]
  • + *
+ */ + public static final Pattern HEAD_FORWARD = $("^\\[Forwarded from (.+)\\]$"); + + + /** + * An uploaded telegram file (.+) + * + *


+ * Examples + *

    + *
  • [ File : AndroidManifest.xml ]
  • + *
+ */ + public static final Pattern HEAD_FILE = $("^\\[ File : (.+) \\]$"); + /** + * Sticker picture of character (.) + * + *


+ * Examples + *

    + *
  • [ 😋 Sticker ]
  • + *
+ */ + public static final Pattern HEAD_STICKER = $("^\\[ (.) Sticker \\]$"); + + + /** + * Indicates that this message contains a photo collection + */ + public static final Pattern HEAD_IS_ALBUM = $("^\\[ Album \\]$"); + /** + * Indicates that this message contains a photo + */ + public static final Pattern HEAD_IS_PHOTO = $("^\\[ Photo \\]$"); + + + /** + * A static class for Links and Hash-tags scanner Regex in message body + *
+ *

Regular expression usage

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
NameMatches
Bare link(newline)URL ...text...
Telegram link...text... (URL) ...text...
Inlined links...text... URL ...text...
Hash-tag...text... #tag ...text...
+ */ + public static final class MessageBodyRegexConstants { + /** + * Telegram topic Hash-tags + *


+ * Examples + * + *

    + *
  • Telegram #hashtag + *
  • #Topic_misc a new topic + *
  • #offtopic + *
  • #java 11 released! + *
+ */ + public static final Pattern HASHTAG = $("(^|\\s)#(?!#)((\\S(? + * Form: text (url) + * + *


+ * Examples + *

    + *
  • 系统服务 (https://blog.yuuta.moe/2017/11/10/from-vibrator-to-system-service/)
  • + *
+ */ + public static final Pattern LINK_TELEGRAM = $("\\((\\w+)://(\\S+)\\)"); + /** + * User inline text URL links + *


+ * Examples + *

    + *
  • See: https://github.com/duangsuse/RandomPicture/commit/440b8a1c7d2251b0074c1571c0d07c613628fc54 <3
  • + *
+ */ + public static final Pattern LINK_INLINED = $("(?![\\(\\)]).((http|https):(\\S+))"); + /** + * Bare text links, a newline starting with http|https + *


+ * Examples + * + *

    + *
  • https://github.com/aosp-mirror/platform_frameworks_base/blob/pie-release/tools/aapt2
  • + *
  • http://localhost:8080
  • + *
+ */ + public static final Pattern LINK_BARE = $("^(http|https):(\\S+)"); + } +} diff --git a/sourcemanager/Identifiable.java b/sourcemanager/Identifiable.java new file mode 100644 index 0000000..0616e8e --- /dev/null +++ b/sourcemanager/Identifiable.java @@ -0,0 +1,19 @@ +package org.duangsuse.telegramscanner.sourcemanager; + +/** + * Identifiable by integer + */ +public interface Identifiable { + /** + * Gets identical object id that is: + *
+ * + *
    + *
  • Reflexive: Object with same value have same identity
  • + *
  • Consistent: The identity should be match for the same object whenever this method was called
  • + *
+ * + * @return object JVM global identity + */ + int getIdentity(); +} diff --git a/sourcemanager/SimpleMapDelegate.java b/sourcemanager/SimpleMapDelegate.java new file mode 100644 index 0000000..d5c602f --- /dev/null +++ b/sourcemanager/SimpleMapDelegate.java @@ -0,0 +1,76 @@ +package org.duangsuse.telegramscanner.sourcemanager; + +import org.jetbrains.annotations.Contract; + +import java.util.HashMap; +import java.util.Set; + +/** + * Delegates {@link HashMap}, copying some useful method + *
+ *
    + *
  • size + *
  • get/put + *
  • containsKey + *
  • remove + *
  • clone + *
  • equals/hashcode/toString + *
+ * @param Key type + * @param Value type + * @see HashMap delegated class + */ +@SuppressWarnings("SuspiciousMethodCalls") /* unnecessary for delegates */ +public class SimpleMapDelegate { + private HashMap mMap; + + @SuppressWarnings("WeakerAccess") /* should be shared */ + protected SimpleMapDelegate(HashMap receiver) { + this.mMap = receiver; + } + + public int size() { + return mMap.size(); + } + + public V get(Object key) { + return mMap.get(key); + } + + public boolean containsKey(Object key) { + return mMap.containsKey(key); + } + + public V put(K key, V value) { + return mMap.put(key, value); + } + + public V remove(Object key) { + return mMap.remove(key); + } + + public Set keySet() { + return mMap.keySet(); + } + + @Contract(value = "null -> false", pure = true) + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + SimpleMapDelegate that = (SimpleMapDelegate) o; + + return mMap.equals(that.mMap); + } + + @Override + public int hashCode() { + return mMap.hashCode(); + } + + @Override + public String toString() { + return mMap.toString(); + } +} diff --git a/sourcemanager/SourceLocation.java b/sourcemanager/SourceLocation.java new file mode 100644 index 0000000..a58fb0c --- /dev/null +++ b/sourcemanager/SourceLocation.java @@ -0,0 +1,84 @@ +package org.duangsuse.telegramscanner.sourcemanager; + +/** + * Scanner source location + * + *
+ *
    + *
  • Text offset + *
  • Line + *
  • Message No + *
  • Message Local line count + *
+ */ +public class SourceLocation { + private int offset, line; + + /** + * Scanned message number index + */ + private int messageNo; + /** + * Scanned at line, relative to message header start + */ + private int messageLine; + + public SourceLocation() { + offset = -1; + } + + public SourceLocation(int offset, int line, int messageNo, int messageLine) { + super(); + this.offset = offset; + this.line = line; + this.messageNo = messageNo; + this.messageLine = messageLine; + } + + public int getOffset() { + return offset; + } + + public int getLine() { + return line; + } + + public int getMessageNo() { + return messageNo; + } + + public int getMessageLine() { + return messageLine; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + SourceLocation that = (SourceLocation) o; + + if (offset != that.offset) return false; + if (line != that.line) return false; + if (messageNo != that.messageNo) return false; + return messageLine == that.messageLine; + } + + @Override + public int hashCode() { + int result = offset; + result = 31 * result + line; + result = 31 * result + messageNo; + result = 31 * result + messageLine; + return result; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("SourceLocation"); + sb.append('(').append('@').append(offset).append('L').append(line) + .append(", Message#").append(messageNo).append(':').append(messageLine).append(')'); + + return sb.toString(); + } +} diff --git a/sourcemanager/SourceManager.java b/sourcemanager/SourceManager.java new file mode 100644 index 0000000..d007a0e --- /dev/null +++ b/sourcemanager/SourceManager.java @@ -0,0 +1,24 @@ +package org.duangsuse.telegramscanner.sourcemanager; + +import java.util.HashMap; + +/** + * Source line manager + * + * @see SimpleMapDelegate api class + */ +public class SourceManager extends SimpleMapDelegate { + @SuppressWarnings("WeakerAccess") /* should be shared api */ + public SourceManager() { + super(new HashMap<>()); + } + + /** + * Lazy source manager singleton instance + */ + private static final class LazyHolder { + static final SourceManager INSTANCE = new SourceManager(); + } + + public SourceManager getInstance() { return LazyHolder.INSTANCE; } +}