From d5f5653665d4a6a3fc3e4fc1acd680180a78aaa2 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Fri, 2 Oct 2015 14:25:03 +0100 Subject: [PATCH 01/30] Fix default Unicode alternate skip count --- .../parser/standard/ParserState.java | 2 +- .../standard/StandardRtfParserTest.java | 6 + .../parser/standard/data/testGitHubIssue6.rtf | 1 + .../parser/standard/data/testGitHubIssue6.xml | 253 ++++++++++++++++++ 4 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java index 057e4d8..4fe7b3f 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java @@ -37,5 +37,5 @@ public ParserState(ParserState state) public int currentFont; public String currentEncoding; public String currentFontEncoding; - public int unicodeAlternateSkipCount; + public int unicodeAlternateSkipCount = 1; } diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index fa1e081..c508e12 100644 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -70,4 +70,10 @@ public void testSpecialChars() throws Exception { TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testSpecialChars"); } + + @Test + public void testGitHubIssue6() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testGitHubIssue6"); + } } diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf new file mode 100644 index 0000000..3dccc6f --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf @@ -0,0 +1 @@ +{\rtf1\ansi\ansicpg1252\deff0{\fonttbl{\f0\froman\fcharset0 Times New Roman;}{\f1\froman\fcharset0 Arial;}{\f2\froman\fcharset0 Courier;}}{\colortbl\red0\green0\blue0;\red255\green255\blue255;}{\stylesheet {\style\s0 \ql\fi0\li0\ri0\f1\fs24\cf0 Normal;}{\style\s3 \ql\fi0\li0\ri0\f1\fs26\b\cf0 heading 3;}{\style\s2 \ql\fi0\li0\ri0\f1\fs28\b\i\cf0 heading 2;}{\style\s1 \ql\fi0\li0\ri0\f1\fs32\b\cf0 heading 1;}}{\*\listtable}{\*\listoverridetable}{\*\generator iText 2.1.7 by 1T3XT}{\info}\paperw12242\paperh15842\margl1425\margr360\margt950\margb1425{\header \pard\plain\s0\qr\fi0\li0\ri0\sl320\plain\f0{\field{\*\fldinst PAGE}{\fldrslt }}\f2\fs24 . \line \par}\pgwsxn12242\pghsxn15842\marglsxn1425\margrsxn360\margtsxn950\margbsxn1425\pard\plain\s0\ql\fi-734\li734\ri0\sb480\sa240\sl240\plain\tx720\tqr\tx9580\tx9720{\f2\fs24\cf0\chcbpat1 \tab }{\f2\fs24\cf0\chcbpat1 INNEN. K\u220?CHE - TAG}\par\pard\plain\s0\qj\fi0\li734\ri864\sb240\sa240\sl240\plain\tx1920\tx3840\tx5760\tx7680\tx9600{\f2\fs24\cf0\chcbpat1 Ein Absatz mit Line-Separator:\line Der geht hier auf einer neuen Zeile weiter.}\par\pard\plain\s0\ql\fi-734\li734\ri0\sb480\sa240\sl240\plain\tx720\tqr\tx9580\tx9720{\f2\fs24\cf0\chcbpat1 \tab }{\f2\fs24\cf0\chcbpat1 INNEN. K\u220?CHE - TAG}\par\pard\plain\s0\qj\fi0\li734\ri864\sb240\sl240\plain\tx1920\tx3840\tx5760\tx7680\tx9600{\f2\fs24\cf0\chcbpat1 Hier ist die zweite Szene.}\par} \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml new file mode 100644 index 0000000..f6a81d6 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml @@ -0,0 +1,253 @@ + + + + + + + + + + + + Times New Roman; + + + + + + Arial; + + + + + + Courier; + + + + + + + + ; + + + + ; + + + + + + + + + + + + + Normal; + + + + + + + + + + + + heading 3; + + + + + + + + + + + + + heading 2; + + + + + + + + + + + + heading 1; + + + + + + + + + + + iText 2.1.7 by 1T3XT + + + + + + + + + + + + + + + + + + + + + + + + + + + PAGE + + + + + + + + + . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + INNEN. KÜCHE - TAG + + + + + + + + + + + + + + + + + + + + + + + + Ein Absatz mit Line-Separator: + + Der geht hier auf einer neuen Zeile weiter. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + INNEN. KÜCHE - TAG + + + + + + + + + + + + + + + + + + + + + + + Hier ist die zweite Szene. + + + + \ No newline at end of file From d67da79649b9999681a543e3e3d1aa452eb06522 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Mon, 11 Jan 2016 16:31:49 +0000 Subject: [PATCH 02/30] Added support for Cp1254. --- .../parser/standard/Encoding.java | 199 +++++++++--------- .../standard/StandardRtfParserTest.java | 7 + .../standard/data/testTurkishEncoding.rtf | 4 + .../standard/data/testTurkishEncoding.xml | 39 ++++ 4 files changed, 150 insertions(+), 99 deletions(-) create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java index d001d6e..f2116dc 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java @@ -1,99 +1,100 @@ -/* - * Copyright 2013 Jon Iles - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.rtfparserkit.parser.standard; - -import java.util.HashMap; -import java.util.Map; - -/** - * Represents character encodings which may be encountered in an RTF file. - */ -class Encoding -{ - public static final String ANSI_ENCODING = "Cp1252"; - public static final String PC_ENCODING = "Cp437"; - public static final String PCA_ENCODING = "Cp850"; - - public static final Map LOCALEID_MAPPING = new HashMap(); - static - { - LOCALEID_MAPPING.put("932", "SJIS"); // Japanese - LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese - LOCALEID_MAPPING.put("1025", "Cp1256"); // Arabic (Saudi Arabia) - LOCALEID_MAPPING.put("1026", "Cp1251"); // Bulgarian - LOCALEID_MAPPING.put("1028", "Cp950"); // Chinese (Taiwan) - LOCALEID_MAPPING.put("1029", "Cp1250"); // Czech - LOCALEID_MAPPING.put("1032", "Cp1253"); // Greek - LOCALEID_MAPPING.put("1037", "Cp1255"); // Hebrew - LOCALEID_MAPPING.put("1038", "Cp1250"); // Hungarian - LOCALEID_MAPPING.put("1041", "SJIS"); // Japanese - LOCALEID_MAPPING.put("1042", "Cp949"); // Korean - LOCALEID_MAPPING.put("1045", "Cp1250"); // Polish - LOCALEID_MAPPING.put("1048", "Cp1250"); // Romanian - LOCALEID_MAPPING.put("1049", "Cp1251"); // Russian - LOCALEID_MAPPING.put("1050", "Cp1250"); // Croatian - LOCALEID_MAPPING.put("1051", "Cp1250"); // Slovak - LOCALEID_MAPPING.put("1052", "Cp1250"); // Albanian - LOCALEID_MAPPING.put("1054", "Cp874"); // Thai - LOCALEID_MAPPING.put("1055", "Cp1254"); // Turkish - LOCALEID_MAPPING.put("1056", "Cp1256"); // Urdu - LOCALEID_MAPPING.put("1058", "Cp1251"); // Ukrainian - LOCALEID_MAPPING.put("1059", "Cp1251"); // Belarusian - LOCALEID_MAPPING.put("1060", "Cp1250"); // Slovenian - LOCALEID_MAPPING.put("1061", "Cp1257"); // Estonian - LOCALEID_MAPPING.put("1062", "Cp1257"); // Latvian - LOCALEID_MAPPING.put("1063", "Cp1257"); // Lithuanian - LOCALEID_MAPPING.put("1065", "Cp1256"); // Farsi - LOCALEID_MAPPING.put("1066", "Cp1258"); // Vietnamese - LOCALEID_MAPPING.put("1068", "Cp1254"); // Azeri (Latin) - LOCALEID_MAPPING.put("1071", "Cp1251"); // FYRO Macedonian - LOCALEID_MAPPING.put("1087", "Cp1251"); // Kazakh - LOCALEID_MAPPING.put("1088", "Cp1251"); // Kyrgyz (Cyrillic) - LOCALEID_MAPPING.put("1091", "Cp1254"); // Uzbek (Latin) - LOCALEID_MAPPING.put("1092", "Cp1251"); // Tatar - LOCALEID_MAPPING.put("1104", "Cp1251"); // Mongolian (Cyrillic) - LOCALEID_MAPPING.put("1250", "Cp1250"); // Windows Latin 2 (Central Europe) - LOCALEID_MAPPING.put("1251", "Cp1251"); // Cyrillic - LOCALEID_MAPPING.put("1252", "Cp1252"); // Latin - LOCALEID_MAPPING.put("1255", "Cp1255"); // Windows Hebrew - LOCALEID_MAPPING.put("1256", "Cp1256"); // Arabic (Iraq) - LOCALEID_MAPPING.put("2049", "Cp1256"); // Arabic (Iraq) - LOCALEID_MAPPING.put("2052", "MS936"); // Chinese (PRC) - LOCALEID_MAPPING.put("2074", "Cp1250"); // Serbian (Latin) - LOCALEID_MAPPING.put("2092", "Cp1251"); // Azeri (Cyrillic) - LOCALEID_MAPPING.put("2115", "Cp1251"); // Uzbek (Cyrillic) - LOCALEID_MAPPING.put("3073", "Cp1256"); // Arabic (Egypt) - LOCALEID_MAPPING.put("3076", "Cp950"); // Chinese (Hong Kong S.A.R.) - LOCALEID_MAPPING.put("3098", "Cp1251"); // Serbian (Cyrillic) - LOCALEID_MAPPING.put("4097", "Cp1256"); // Arabic (Libya) - LOCALEID_MAPPING.put("4100", "MS936"); // Chinese (Singapore) - LOCALEID_MAPPING.put("5121", "Cp1256"); // Arabic (Algeria) - LOCALEID_MAPPING.put("5124", "Cp950"); // Chinese (Macau S.A.R.) - LOCALEID_MAPPING.put("6145", "Cp1256"); // Arabic (Morocco) - LOCALEID_MAPPING.put("7169", "Cp1256"); // Arabic (Tunisia) - LOCALEID_MAPPING.put("8193", "Cp1256"); // Arabic (Oman) - LOCALEID_MAPPING.put("9217", "Cp1256"); // Arabic (Yemen) - LOCALEID_MAPPING.put("10000", "MacRoman"); // Mac Roman - LOCALEID_MAPPING.put("10241", "Cp1256"); // Arabic (Syria) - LOCALEID_MAPPING.put("11265", "Cp1256"); // Arabic (Jordan) - LOCALEID_MAPPING.put("12289", "Cp1256"); // Arabic (Lebanon) - LOCALEID_MAPPING.put("13313", "Cp1256"); // Arabic (Kuwait) - LOCALEID_MAPPING.put("14337", "Cp1256"); // Arabic (U.A.E.) - LOCALEID_MAPPING.put("15361", "Cp1256"); // Arabic (Bahrain) - LOCALEID_MAPPING.put("16385", "Cp1256"); // Arabic (Qatar) - } -} +/* + * Copyright 2013 Jon Iles + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.rtfparserkit.parser.standard; + +import java.util.HashMap; +import java.util.Map; + +/** + * Represents character encodings which may be encountered in an RTF file. + */ +class Encoding +{ + public static final String ANSI_ENCODING = "Cp1252"; + public static final String PC_ENCODING = "Cp437"; + public static final String PCA_ENCODING = "Cp850"; + + public static final Map LOCALEID_MAPPING = new HashMap(); + static + { + LOCALEID_MAPPING.put("932", "SJIS"); // Japanese + LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese + LOCALEID_MAPPING.put("1025", "Cp1256"); // Arabic (Saudi Arabia) + LOCALEID_MAPPING.put("1026", "Cp1251"); // Bulgarian + LOCALEID_MAPPING.put("1028", "Cp950"); // Chinese (Taiwan) + LOCALEID_MAPPING.put("1029", "Cp1250"); // Czech + LOCALEID_MAPPING.put("1032", "Cp1253"); // Greek + LOCALEID_MAPPING.put("1037", "Cp1255"); // Hebrew + LOCALEID_MAPPING.put("1038", "Cp1250"); // Hungarian + LOCALEID_MAPPING.put("1041", "SJIS"); // Japanese + LOCALEID_MAPPING.put("1042", "Cp949"); // Korean + LOCALEID_MAPPING.put("1045", "Cp1250"); // Polish + LOCALEID_MAPPING.put("1048", "Cp1250"); // Romanian + LOCALEID_MAPPING.put("1049", "Cp1251"); // Russian + LOCALEID_MAPPING.put("1050", "Cp1250"); // Croatian + LOCALEID_MAPPING.put("1051", "Cp1250"); // Slovak + LOCALEID_MAPPING.put("1052", "Cp1250"); // Albanian + LOCALEID_MAPPING.put("1054", "Cp874"); // Thai + LOCALEID_MAPPING.put("1055", "Cp1254"); // Turkish + LOCALEID_MAPPING.put("1056", "Cp1256"); // Urdu + LOCALEID_MAPPING.put("1058", "Cp1251"); // Ukrainian + LOCALEID_MAPPING.put("1059", "Cp1251"); // Belarusian + LOCALEID_MAPPING.put("1060", "Cp1250"); // Slovenian + LOCALEID_MAPPING.put("1061", "Cp1257"); // Estonian + LOCALEID_MAPPING.put("1062", "Cp1257"); // Latvian + LOCALEID_MAPPING.put("1063", "Cp1257"); // Lithuanian + LOCALEID_MAPPING.put("1065", "Cp1256"); // Farsi + LOCALEID_MAPPING.put("1066", "Cp1258"); // Vietnamese + LOCALEID_MAPPING.put("1068", "Cp1254"); // Azeri (Latin) + LOCALEID_MAPPING.put("1071", "Cp1251"); // FYRO Macedonian + LOCALEID_MAPPING.put("1087", "Cp1251"); // Kazakh + LOCALEID_MAPPING.put("1088", "Cp1251"); // Kyrgyz (Cyrillic) + LOCALEID_MAPPING.put("1091", "Cp1254"); // Uzbek (Latin) + LOCALEID_MAPPING.put("1092", "Cp1251"); // Tatar + LOCALEID_MAPPING.put("1104", "Cp1251"); // Mongolian (Cyrillic) + LOCALEID_MAPPING.put("1250", "Cp1250"); // Windows Latin 2 (Central Europe) + LOCALEID_MAPPING.put("1251", "Cp1251"); // Cyrillic + LOCALEID_MAPPING.put("1252", "Cp1252"); // Latin + LOCALEID_MAPPING.put("1254", "Cp1254"); // Turkish + LOCALEID_MAPPING.put("1255", "Cp1255"); // Windows Hebrew + LOCALEID_MAPPING.put("1256", "Cp1256"); // Arabic (Iraq) + LOCALEID_MAPPING.put("2049", "Cp1256"); // Arabic (Iraq) + LOCALEID_MAPPING.put("2052", "MS936"); // Chinese (PRC) + LOCALEID_MAPPING.put("2074", "Cp1250"); // Serbian (Latin) + LOCALEID_MAPPING.put("2092", "Cp1251"); // Azeri (Cyrillic) + LOCALEID_MAPPING.put("2115", "Cp1251"); // Uzbek (Cyrillic) + LOCALEID_MAPPING.put("3073", "Cp1256"); // Arabic (Egypt) + LOCALEID_MAPPING.put("3076", "Cp950"); // Chinese (Hong Kong S.A.R.) + LOCALEID_MAPPING.put("3098", "Cp1251"); // Serbian (Cyrillic) + LOCALEID_MAPPING.put("4097", "Cp1256"); // Arabic (Libya) + LOCALEID_MAPPING.put("4100", "MS936"); // Chinese (Singapore) + LOCALEID_MAPPING.put("5121", "Cp1256"); // Arabic (Algeria) + LOCALEID_MAPPING.put("5124", "Cp950"); // Chinese (Macau S.A.R.) + LOCALEID_MAPPING.put("6145", "Cp1256"); // Arabic (Morocco) + LOCALEID_MAPPING.put("7169", "Cp1256"); // Arabic (Tunisia) + LOCALEID_MAPPING.put("8193", "Cp1256"); // Arabic (Oman) + LOCALEID_MAPPING.put("9217", "Cp1256"); // Arabic (Yemen) + LOCALEID_MAPPING.put("10000", "MacRoman"); // Mac Roman + LOCALEID_MAPPING.put("10241", "Cp1256"); // Arabic (Syria) + LOCALEID_MAPPING.put("11265", "Cp1256"); // Arabic (Jordan) + LOCALEID_MAPPING.put("12289", "Cp1256"); // Arabic (Lebanon) + LOCALEID_MAPPING.put("13313", "Cp1256"); // Arabic (Kuwait) + LOCALEID_MAPPING.put("14337", "Cp1256"); // Arabic (U.A.E.) + LOCALEID_MAPPING.put("15361", "Cp1256"); // Arabic (Bahrain) + LOCALEID_MAPPING.put("16385", "Cp1256"); // Arabic (Qatar) + } +} diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index c508e12..971b854 100644 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -76,4 +76,11 @@ public void testGitHubIssue6() throws Exception { TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testGitHubIssue6"); } + + @Test + public void testTurkishEncoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testTurkishEncoding"); + } + } diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf new file mode 100644 index 0000000..570d43f --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf @@ -0,0 +1,4 @@ +{\rtf1\ansi\ansicpg1254\deff0\nouicompat\deflang1055{\fonttbl{\f0\fnil\fcharset162 Segoe UI;}{\f1\fnil\fcharset0 Segoe UI;}} +{\*\generator Riched20 15.0.4567}{\*\mmathPr\mwrapIndent1440 }\viewkind4\uc1 +\pard\f0\fs20 Turkish Encoding.\f1\par +} \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml new file mode 100644 index 0000000..d6654af --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + Segoe UI; + + + + + + Segoe UI; + + + + + Riched20 15.0.4567 + + + + + + + + + + Turkish Encoding. + + + + \ No newline at end of file From b50177a7fc83bb63fdcef35a3df9efcd8aebb948 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Tue, 2 Feb 2016 21:01:47 +0000 Subject: [PATCH 03/30] Renamed RtfDumpListener, added RtfDump utility. --- .../src/com/rtfparserkit/utils/RtfDump.java | 206 ++--------------- .../rtfparserkit/utils/RtfDumpListener.java | 208 ++++++++++++++++++ .../com/rtfparserkit/utils/TestUtilities.java | 4 +- 3 files changed, 225 insertions(+), 193 deletions(-) create mode 100644 RTF Parser Kit/src/com/rtfparserkit/utils/RtfDumpListener.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDump.java b/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDump.java index a39b4eb..fcaac0b 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDump.java +++ b/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDump.java @@ -1,208 +1,32 @@ -/* - * Copyright 2013 Jon Iles - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package com.rtfparserkit.utils; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; import java.io.OutputStream; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; +import com.rtfparserkit.parser.IRtfParser; +import com.rtfparserkit.parser.RtfStreamSource; +import com.rtfparserkit.parser.standard.StandardRtfParser; -import com.rtfparserkit.parser.IRtfListener; -import com.rtfparserkit.rtf.Command; -import com.sun.xml.internal.txw2.output.IndentingXMLStreamWriter; - -/** - * Trivial class used to convert events generated by an RTF parser into an XML document. - * The primary purpose of this code is to debug the parser output, and provide a - * convenient method for comparing expected and actual parser behaviour in test cases. - * - * Note that we're using an internal Sun class to produce indented XML. Not strictly - * necessary, but it makes the output more readable. - */ -@SuppressWarnings("restriction") -public class RtfDump implements IRtfListener +public class RtfDump { - /** - * Constructor. - */ - public RtfDump(OutputStream stream) - throws XMLStreamException - { - writer = new IndentingXMLStreamWriter(XMLOutputFactory.newInstance().createXMLStreamWriter(stream, "UTF-8")); - } - - /** - * Create the document header. - */ - @Override - public void processDocumentStart() + public static void main(String[] argv) { try { - writer.writeStartDocument("UTF-8", "1.0"); - writer.writeStartElement("rtf"); + InputStream is = new FileInputStream(argv[0]); + OutputStream os = new FileOutputStream(argv[1]); + IRtfParser parser = new StandardRtfParser(); + parser.parse(new RtfStreamSource(is), new RtfDumpListener(os)); + os.close(); } - catch (XMLStreamException ex) - { - throw new RuntimeException(ex); - } - } - - /** - * Create the document trailer. - */ - @Override - public void processDocumentEnd() - { - try - { - writer.writeEndElement(); - writer.writeEndDocument(); - } - catch (XMLStreamException ex) - { - throw new RuntimeException(ex); - } - } - - /** - * Write character bytes - note that we cheat, we just convert them - * directly to a string for output with no regard to the encoding. - */ - @Override - public void processCharacterBytes(byte[] data) - { - try - { - if (data.length != 0) - { - writer.writeStartElement("chars"); - writer.writeCharacters(new String(data)); - writer.writeEndElement(); - } - } - - catch (XMLStreamException ex) - { - throw new RuntimeException(ex); - } - } - - /** - * Write binary data as hex. - */ - @Override - public void processBinaryBytes(byte[] data) - { - try - { - writer.writeStartElement("bytes"); - for (byte b : data) - { - writer.writeCharacters(Integer.toHexString(b)); - } - writer.writeEndElement(); - } - - catch (XMLStreamException ex) - { - throw new RuntimeException(ex); - } - } - /** - * Write a group start tag. - */ - @Override - public void processGroupStart() - { - try - { - writer.writeStartElement("group"); - } - - catch (XMLStreamException ex) - { - throw new RuntimeException(ex); - } - } - - /** - * Write a group end tag. - */ - @Override - public void processGroupEnd() - { - try - { - writer.writeEndElement(); - } - - catch (XMLStreamException ex) - { - throw new RuntimeException(ex); - } - } - - /** - * Write a command tag. - */ - @Override - public void processCommand(Command command, int parameter, boolean hasParameter, boolean optional) - { - try - { - writer.writeEmptyElement("command"); - writer.writeAttribute("name", command.getCommandName()); - - if (hasParameter) - { - writer.writeAttribute("parameter", Integer.toString(parameter)); - } - - if (optional) - { - writer.writeAttribute("optional", "true"); - } - } - catch (XMLStreamException ex) - { - throw new RuntimeException(ex); - } - } - - /** - * Write string data. - */ - @Override - public void processString(String string) - { - try - { - writer.writeStartElement("chars"); - writer.writeCharacters(string); - writer.writeEndElement(); - } - catch (XMLStreamException ex) + catch (Exception ex) { - throw new RuntimeException(ex); + ex.printStackTrace(); } } - private final XMLStreamWriter writer; } diff --git a/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDumpListener.java b/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDumpListener.java new file mode 100644 index 0000000..a610428 --- /dev/null +++ b/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDumpListener.java @@ -0,0 +1,208 @@ +/* + * Copyright 2013 Jon Iles + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.rtfparserkit.utils; + +import java.io.OutputStream; + +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import com.rtfparserkit.parser.IRtfListener; +import com.rtfparserkit.rtf.Command; +import com.sun.xml.internal.txw2.output.IndentingXMLStreamWriter; + +/** + * Trivial class used to convert events generated by an RTF parser into an XML document. + * The primary purpose of this code is to debug the parser output, and provide a + * convenient method for comparing expected and actual parser behaviour in test cases. + * + * Note that we're using an internal Sun class to produce indented XML. Not strictly + * necessary, but it makes the output more readable. + */ +@SuppressWarnings("restriction") +public class RtfDumpListener implements IRtfListener +{ + /** + * Constructor. + */ + public RtfDumpListener(OutputStream stream) + throws XMLStreamException + { + writer = new IndentingXMLStreamWriter(XMLOutputFactory.newInstance().createXMLStreamWriter(stream, "UTF-8")); + } + + /** + * Create the document header. + */ + @Override + public void processDocumentStart() + { + try + { + writer.writeStartDocument("UTF-8", "1.0"); + writer.writeStartElement("rtf"); + } + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Create the document trailer. + */ + @Override + public void processDocumentEnd() + { + try + { + writer.writeEndElement(); + writer.writeEndDocument(); + } + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Write character bytes - note that we cheat, we just convert them + * directly to a string for output with no regard to the encoding. + */ + @Override + public void processCharacterBytes(byte[] data) + { + try + { + if (data.length != 0) + { + writer.writeStartElement("chars"); + writer.writeCharacters(new String(data)); + writer.writeEndElement(); + } + } + + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Write binary data as hex. + */ + @Override + public void processBinaryBytes(byte[] data) + { + try + { + writer.writeStartElement("bytes"); + for (byte b : data) + { + writer.writeCharacters(Integer.toHexString(b)); + } + writer.writeEndElement(); + } + + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Write a group start tag. + */ + @Override + public void processGroupStart() + { + try + { + writer.writeStartElement("group"); + } + + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Write a group end tag. + */ + @Override + public void processGroupEnd() + { + try + { + writer.writeEndElement(); + } + + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Write a command tag. + */ + @Override + public void processCommand(Command command, int parameter, boolean hasParameter, boolean optional) + { + try + { + writer.writeEmptyElement("command"); + writer.writeAttribute("name", command.getCommandName()); + + if (hasParameter) + { + writer.writeAttribute("parameter", Integer.toString(parameter)); + } + + if (optional) + { + writer.writeAttribute("optional", "true"); + } + } + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Write string data. + */ + @Override + public void processString(String string) + { + try + { + writer.writeStartElement("chars"); + writer.writeCharacters(string); + writer.writeEndElement(); + } + catch (XMLStreamException ex) + { + throw new RuntimeException(ex); + } + } + + private final XMLStreamWriter writer; +} diff --git a/RTF Parser Kit/test/com/rtfparserkit/utils/TestUtilities.java b/RTF Parser Kit/test/com/rtfparserkit/utils/TestUtilities.java index 904af9f..0906dcc 100644 --- a/RTF Parser Kit/test/com/rtfparserkit/utils/TestUtilities.java +++ b/RTF Parser Kit/test/com/rtfparserkit/utils/TestUtilities.java @@ -64,7 +64,7 @@ public static void assertRtfParserDumpMatches(Object parentTest, IRtfParser pars { is = parentTest.getClass().getResourceAsStream("data/" + filename + ".rtf"); os = new FileOutputStream(outputFile); - parser.parse(new RtfStreamSource(is), new RtfDump(os)); + parser.parse(new RtfStreamSource(is), new RtfDumpListener(os)); } finally @@ -150,7 +150,7 @@ public static void dump(IRtfParser parser, String filename, String outputFilenam { is = StandardRtfParserTest.class.getResourceAsStream("data/" + filename + ".rtf"); os = new FileOutputStream(outputFile); - parser.parse(new RtfStreamSource(is), new RtfDump(os)); + parser.parse(new RtfStreamSource(is), new RtfDumpListener(os)); } finally From 26874e5f59cae4fc21ab51e15010d8387363a677 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Tue, 2 Feb 2016 21:02:05 +0000 Subject: [PATCH 04/30] Added missing Greek encoding --- .../parser/standard/Encoding.java | 1 + .../standard/StandardRtfParserTest.java | 6 ++++ .../standard/data/testGreekEncoding.rtf | 3 ++ .../standard/data/testGreekEncoding.xml | 32 +++++++++++++++++++ 4 files changed, 42 insertions(+) create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java index f2116dc..60c2a09 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java @@ -69,6 +69,7 @@ class Encoding LOCALEID_MAPPING.put("1250", "Cp1250"); // Windows Latin 2 (Central Europe) LOCALEID_MAPPING.put("1251", "Cp1251"); // Cyrillic LOCALEID_MAPPING.put("1252", "Cp1252"); // Latin + LOCALEID_MAPPING.put("1253", "Cp1253"); // Greek LOCALEID_MAPPING.put("1254", "Cp1254"); // Turkish LOCALEID_MAPPING.put("1255", "Cp1255"); // Windows Hebrew LOCALEID_MAPPING.put("1256", "Cp1256"); // Arabic (Iraq) diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index 971b854..579d300 100644 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -83,4 +83,10 @@ public void testTurkishEncoding() throws Exception TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testTurkishEncoding"); } + @Test + public void testGreekEncoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testGreekEncoding"); + } + } diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf new file mode 100644 index 0000000..60128c3 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf @@ -0,0 +1,3 @@ +{\rtf1\ansi\ansicpg1252\deff0\deflang2057{\fonttbl{\f0\fnil\fcharset0 Tahoma;}{\f1\fnil\fcharset161 Tahoma;}} +\viewkind4\uc1\pard\f0\fs20 Unicode \'80\f1\'d9\f0\par +} \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml new file mode 100644 index 0000000..cdfe3bd --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + Tahoma; + + + + + + Tahoma; + + + + + + + Unicode € + + Ω + + + + \ No newline at end of file From a3eb9542d99c971c5cd68c6e3d407da1d72e42c4 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Tue, 2 Feb 2016 21:12:05 +0000 Subject: [PATCH 05/30] Added Korean encoding --- .../parser/standard/Encoding.java | 1 + .../standard/StandardRtfParserTest.java | 5 +++ .../standard/data/testKoreanEncoding.rtf | 4 +++ .../standard/data/testKoreanEncoding.xml | 35 +++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java index 60c2a09..ddeb819 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java @@ -33,6 +33,7 @@ class Encoding { LOCALEID_MAPPING.put("932", "SJIS"); // Japanese LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese + LOCALEID_MAPPING.put("949", "Cp949"); // Korean LOCALEID_MAPPING.put("1025", "Cp1256"); // Arabic (Saudi Arabia) LOCALEID_MAPPING.put("1026", "Cp1251"); // Bulgarian LOCALEID_MAPPING.put("1028", "Cp950"); // Chinese (Taiwan) diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index 579d300..4b82417 100644 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -89,4 +89,9 @@ public void testGreekEncoding() throws Exception TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testGreekEncoding"); } + @Test + public void testKoreanEncoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testKoreanEncoding"); + } } diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf new file mode 100644 index 0000000..5c1919c --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf @@ -0,0 +1,4 @@ +{\rtf1\ansi\ansicpg949\deff0\nouicompat\deflang1033\deflangfe1042{\fonttbl{\f0\fswiss\fprq2\fcharset129 \'b8\'bc\'c0\'ba \'b0\'ed\'b5\'f1;}} +{\*\generator Riched20 14.0.4750.1000;}{\*\mmathPr\mwrapIndent1440}\viewkind4\uc1 +\pard\f0\fs20\lang1042 MS Project Addin ProjectPlus\par +} diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml new file mode 100644 index 0000000..717071d --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + 맑은 고딕; + + + + + Riched20 14.0.4750.1000; + + + + + + + + + + + MS Project Addin ProjectPlus + + + \ No newline at end of file From 910dd5282aff8c1f2edc8608527162c1da7b0745 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Mon, 18 Apr 2016 10:12:12 +0100 Subject: [PATCH 06/30] Documentation updates --- README.md | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0d28ee6..4088504 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,18 @@ What's currently included? * Raw RTF Parser - parses RTF, sends events representing content to a listener. Performs minimal processing - you get the RTF commands and data exactly as they appear in the file. * Standard RTF Parser - parses RTF, sends events representing content to a listener. Handles character encoding, Unicode and so on, so you don't have to. This is probably the parser you want to use. * Text Converter - demonstrates very simple text extraction from an RTF file +* RTF Dump - another demonstration, this time writing the RTF file contents as XML -What's planned? ---------------- -* HTML converter -* Parsing to an RTF document object model -* RTF generation from an RTF document object model +Getting Started +=============== -That's a lot of stuff! ----------------------- -Yes it is! It'll take me a while to work my way through the list of things I want to achieve, so I'd love for you to send me some code which extends what I've done or makes it better! +You have a choice of two parsers to work with, the standard parser and the raw parser. The raw parser carries out minimal processing on the RTF, the standard parser handles character encodings, and translates commands which represent special characters into their Unicode equivalents. Most people will want to use the standard parser. + +The parser is invoked something like this: +```java +InputStream is = new FileInputStream("/path/to/my/file.rtf"); +IRtfSource source = new RtfStreamSource(is) +IRtfParser parser = new StandardRtfParser(); +MyRtfListener listener = new MyRtfListener(); +parser.parse(source, listener); +``` From 073ab18850e4d8927f80cf4c9dc5e13ca4574c7f Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Mon, 18 Apr 2016 11:31:09 +0100 Subject: [PATCH 07/30] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4088504..09dfdcc 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Getting Started You have a choice of two parsers to work with, the standard parser and the raw parser. The raw parser carries out minimal processing on the RTF, the standard parser handles character encodings, and translates commands which represent special characters into their Unicode equivalents. Most people will want to use the standard parser. -The parser is invoked something like this: +The parser is invoked like this: ```java InputStream is = new FileInputStream("/path/to/my/file.rtf"); IRtfSource source = new RtfStreamSource(is) @@ -25,3 +25,4 @@ IRtfParser parser = new StandardRtfParser(); MyRtfListener listener = new MyRtfListener(); parser.parse(source, listener); ``` +You provide input to the parser via a class that implements the `IRtfSource` interface. Two implementations are provided for you, `RtfStreamSource`, for reading RTF from a stream, and `RtfStringSource` for reading RTF from a string. From e50d7e4de44cdf5d26654261e3f604342322b56c Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Mon, 18 Apr 2016 11:34:15 +0100 Subject: [PATCH 08/30] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 09dfdcc..6a6ceb7 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,5 @@ MyRtfListener listener = new MyRtfListener(); parser.parse(source, listener); ``` You provide input to the parser via a class that implements the `IRtfSource` interface. Two implementations are provided for you, `RtfStreamSource`, for reading RTF from a stream, and `RtfStringSource` for reading RTF from a string. + +The other thing you need to provide the parser with is alistener class. The listener class implements the `IRtfListener` listener interface. The interface consists of a set of methods which are called by the parser to inform you of when it encounters different parts of the docuent structure. The set of method, along with some comments describing their purpose can be seen [here](https://github.com/joniles/rtfparserkit/blob/master/RTF%20Parser%20Kit/src/com/rtfparserkit/parser/IRtfListener.java). From ce5a41c2cfba7dc44d62ff606b27012d61aedc15 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Mon, 18 Apr 2016 11:39:14 +0100 Subject: [PATCH 09/30] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6a6ceb7..0c371ea 100644 --- a/README.md +++ b/README.md @@ -28,3 +28,5 @@ parser.parse(source, listener); You provide input to the parser via a class that implements the `IRtfSource` interface. Two implementations are provided for you, `RtfStreamSource`, for reading RTF from a stream, and `RtfStringSource` for reading RTF from a string. The other thing you need to provide the parser with is alistener class. The listener class implements the `IRtfListener` listener interface. The interface consists of a set of methods which are called by the parser to inform you of when it encounters different parts of the docuent structure. The set of method, along with some comments describing their purpose can be seen [here](https://github.com/joniles/rtfparserkit/blob/master/RTF%20Parser%20Kit/src/com/rtfparserkit/parser/IRtfListener.java). + +You don't need to implement all of the `IRtfListener` interface yourself, if you wish you can subclass `RtfListenerAdaptor` which provides empty methods for all of the `IRtfListener` methods. You can then just override the methods you are interested in. From c3ba2b90b55663071899ea1d9250fbc85315cc69 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 25 May 2016 10:56:56 +0100 Subject: [PATCH 10/30] Added support for mac encoding --- .../parser/standard/Encoding.java | 1 + .../parser/standard/StandardRtfParser.java | 870 +++++++++--------- 2 files changed, 439 insertions(+), 432 deletions(-) diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java index ddeb819..34db201 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java @@ -27,6 +27,7 @@ class Encoding public static final String ANSI_ENCODING = "Cp1252"; public static final String PC_ENCODING = "Cp437"; public static final String PCA_ENCODING = "Cp850"; + public static final String MAC_ENCODING = "MacRoman"; public static final Map LOCALEID_MAPPING = new HashMap(); static diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java index 28b5115..6f9e24e 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java @@ -1,432 +1,438 @@ -/* - * Copyright 2013 Jon Iles - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.rtfparserkit.parser.standard; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.HashMap; -import java.util.Map; - -import com.rtfparserkit.parser.IRtfListener; -import com.rtfparserkit.parser.IRtfParser; -import com.rtfparserkit.parser.IRtfSource; -import com.rtfparserkit.parser.raw.RawRtfParser; -import com.rtfparserkit.rtf.Command; -import com.rtfparserkit.rtf.CommandType; - -/** - * This class builds on the RawRtfParser to provide a parser which can - * deal with character encodings and Unicode. All of the character data it reads - * is presented back to the client as Unicode strings to make it as simple as - * possible to deal with. - */ -public class StandardRtfParser implements IRtfParser, IRtfListener -{ - /** - * Main entry point: parse RTF data from the input stream, and pass events based on - * the RTF content to the listener. - */ - @Override - public void parse(IRtfSource source, IRtfListener listener) throws IOException - { - handler = new DefaultEventHandler(listener); - IRtfParser reader = new RawRtfParser(); - reader.parse(source, this); - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processGroupStart() - { - handleEvent(GROUP_START); - stack.push(state); - state = new ParserState(state); - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processGroupEnd() - { - handleEvent(GROUP_END); - state = stack.pop(); - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processCharacterBytes(byte[] data) - { - try - { - if (data.length != 0) - { - if (skipBytes < data.length) - { - handleEvent(new StringEvent(new String(data, skipBytes, data.length - skipBytes, currentEncoding()))); - } - skipBytes = 0; - } - } - - catch (UnsupportedEncodingException ex) - { - throw new RuntimeException(ex); - } - } - - /** - * Determine which encoding to use, one defined by the current font, or the current default encoding. - */ - private String currentEncoding() - { - return state.currentFontEncoding == null ? state.currentEncoding : state.currentFontEncoding; - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processDocumentStart() - { - handleEvent(DOCUMENT_START); - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processDocumentEnd() - { - handleEvent(DOCUMENT_END); - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processBinaryBytes(byte[] data) - { - handleEvent(new BinaryBytesEvent(data)); - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processString(String string) - { - handleEvent(new StringEvent(string)); - } - - /** - * Handle event from the RawRtfParser. - */ - @Override - public void processCommand(Command command, int parameter, boolean hasParameter, boolean optional) - { - if (command.getCommandType() == CommandType.Encoding) - { - processEncoding(command, hasParameter, parameter); - } - else - { - boolean optionalFlag = false; - - IParserEvent lastEvent = handler.getLastEvent(); - if (lastEvent.getType() == ParserEventType.COMMAND_EVENT) - { - if (((CommandEvent) lastEvent).getCommand() == Command.optionalcommand) - { - handler.removeLastEvent(); - optionalFlag = true; - } - } - - switch (command) - { - case u: - { - processUnicode(parameter); - break; - } - - case uc: - { - processUnicodeAlternateSkipCount(parameter); - break; - } - - case upr: - { - processUpr(new CommandEvent(command, parameter, hasParameter, optionalFlag)); - break; - } - - case emdash: - { - processCharacter('\u2014'); - break; - } - - case endash: - { - processCharacter('\u2013'); - break; - } - - case emspace: - { - processCharacter('\u2003'); - break; - } - - case enspace: - { - processCharacter('\u2002'); - break; - } - - case qmspace: - { - processCharacter('\u2005'); - break; - } - - case bullet: - { - processCharacter('\u2022'); - break; - } - - case lquote: - { - processCharacter('\u2018'); - break; - } - - case rquote: - { - processCharacter('\u2019'); - break; - } - - case ldblquote: - { - processCharacter('\u201c'); - break; - } - - case rdblquote: - { - processCharacter('\u201d'); - break; - } - - case backslash: - { - processCharacter('\\'); - break; - } - - case opencurly: - { - processCharacter('{'); - break; - } - - case closecurly: - { - processCharacter('}'); - break; - } - - case f: - { - processFont(parameter); - handleCommand(command, parameter, hasParameter, optionalFlag); - break; - } - - case fcharset: - { - processFontCharset(parameter); - handleCommand(command, parameter, hasParameter, optionalFlag); - break; - } - - default: - { - handleCommand(command, parameter, hasParameter, optionalFlag); - break; - } - } - } - } - - /** - * Set the current font and current font encoding in the state. - */ - private void processFont(int parameter) - { - state.currentFont = parameter; - state.currentFontEncoding = m_fontEncodings.get(Integer.valueOf(parameter)); - } - - /** - * Set the charset for the current font. - */ - private void processFontCharset(int parameter) - { - String charset = FontCharset.getCharset(parameter); - if (charset != null) - { - m_fontEncodings.put(Integer.valueOf(state.currentFont), Encoding.LOCALEID_MAPPING.get(charset)); - } - } - - /** - * Switch the encoding based on the RTF command received. - */ - private void processEncoding(Command command, boolean hasParameter, int parameter) - { - String encoding = null; - switch (command) - { - case ansi: - { - encoding = Encoding.ANSI_ENCODING; - break; - } - - case pc: - { - encoding = Encoding.PC_ENCODING; - break; - } - - case pca: - { - encoding = Encoding.PCA_ENCODING; - break; - } - - case ansicpg: - { - encoding = hasParameter ? Encoding.LOCALEID_MAPPING.get(Integer.toString(parameter)) : null; - break; - } - - default: - { - encoding = null; - break; - } - } - - if (encoding == null) - { - throw new IllegalArgumentException("Unsupported encoding command " + command.getCommandName() + (hasParameter ? parameter : "")); - } - - state.currentEncoding = encoding; - } - - /** - * Process an RTF command parameter representing a Unicode character. - */ - private void processUnicode(int parameter) - { - if (parameter < 0) - { - parameter += 65536; - } - - processCharacter((char) parameter); - skipBytes = state.unicodeAlternateSkipCount; - } - - /** - * Set the number of bytes to skip after a Unicode character. - */ - private void processUnicodeAlternateSkipCount(int parameter) - { - state.unicodeAlternateSkipCount = parameter; - } - - /** - * Process a upr command: consume all of the RTF commands relating to this - * and emit events representing the Unicode content. - * @param command - */ - private void processUpr(IParserEvent command) - { - IParserEventHandler uprHandler = new UprHandler(handler); - uprHandler.handleEvent(command); - - handlerStack.push(handler); - handler = uprHandler; - } - - /** - * Process a single character. - */ - private void processCharacter(char c) - { - handleEvent(new StringEvent(Character.toString(c))); - } - - /** - * Process an RTF command. - */ - private void handleCommand(Command command, int parameter, boolean hasParameter, boolean optional) - { - handleEvent(new CommandEvent(command, parameter, hasParameter, optional)); - } - - /** - * Pass an event to the event handler, pop the event handler stack if the current - * event handler has consumed all of the events it can. - */ - private void handleEvent(IParserEvent event) - { - handler.handleEvent(event); - if (handler.isComplete()) - { - handler = handlerStack.pop(); - } - } - - private IParserEventHandler handler; - private final Deque handlerStack = new ArrayDeque(); - - private ParserState state = new ParserState(); - private final Deque stack = new ArrayDeque(); - private int skipBytes; - private Map m_fontEncodings = new HashMap(); - - private static final IParserEvent DOCUMENT_START = new DocumentStartEvent(); - private static final IParserEvent DOCUMENT_END = new DocumentEndEvent(); - private static final IParserEvent GROUP_START = new GroupStartEvent(); - private static final IParserEvent GROUP_END = new GroupEndEvent(); -} +/* + * Copyright 2013 Jon Iles + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.rtfparserkit.parser.standard; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.HashMap; +import java.util.Map; + +import com.rtfparserkit.parser.IRtfListener; +import com.rtfparserkit.parser.IRtfParser; +import com.rtfparserkit.parser.IRtfSource; +import com.rtfparserkit.parser.raw.RawRtfParser; +import com.rtfparserkit.rtf.Command; +import com.rtfparserkit.rtf.CommandType; + +/** + * This class builds on the RawRtfParser to provide a parser which can + * deal with character encodings and Unicode. All of the character data it reads + * is presented back to the client as Unicode strings to make it as simple as + * possible to deal with. + */ +public class StandardRtfParser implements IRtfParser, IRtfListener +{ + /** + * Main entry point: parse RTF data from the input stream, and pass events based on + * the RTF content to the listener. + */ + @Override + public void parse(IRtfSource source, IRtfListener listener) throws IOException + { + handler = new DefaultEventHandler(listener); + IRtfParser reader = new RawRtfParser(); + reader.parse(source, this); + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processGroupStart() + { + handleEvent(GROUP_START); + stack.push(state); + state = new ParserState(state); + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processGroupEnd() + { + handleEvent(GROUP_END); + state = stack.pop(); + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processCharacterBytes(byte[] data) + { + try + { + if (data.length != 0) + { + if (skipBytes < data.length) + { + handleEvent(new StringEvent(new String(data, skipBytes, data.length - skipBytes, currentEncoding()))); + } + skipBytes = 0; + } + } + + catch (UnsupportedEncodingException ex) + { + throw new RuntimeException(ex); + } + } + + /** + * Determine which encoding to use, one defined by the current font, or the current default encoding. + */ + private String currentEncoding() + { + return state.currentFontEncoding == null ? state.currentEncoding : state.currentFontEncoding; + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processDocumentStart() + { + handleEvent(DOCUMENT_START); + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processDocumentEnd() + { + handleEvent(DOCUMENT_END); + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processBinaryBytes(byte[] data) + { + handleEvent(new BinaryBytesEvent(data)); + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processString(String string) + { + handleEvent(new StringEvent(string)); + } + + /** + * Handle event from the RawRtfParser. + */ + @Override + public void processCommand(Command command, int parameter, boolean hasParameter, boolean optional) + { + if (command.getCommandType() == CommandType.Encoding) + { + processEncoding(command, hasParameter, parameter); + } + else + { + boolean optionalFlag = false; + + IParserEvent lastEvent = handler.getLastEvent(); + if (lastEvent.getType() == ParserEventType.COMMAND_EVENT) + { + if (((CommandEvent) lastEvent).getCommand() == Command.optionalcommand) + { + handler.removeLastEvent(); + optionalFlag = true; + } + } + + switch (command) + { + case u: + { + processUnicode(parameter); + break; + } + + case uc: + { + processUnicodeAlternateSkipCount(parameter); + break; + } + + case upr: + { + processUpr(new CommandEvent(command, parameter, hasParameter, optionalFlag)); + break; + } + + case emdash: + { + processCharacter('\u2014'); + break; + } + + case endash: + { + processCharacter('\u2013'); + break; + } + + case emspace: + { + processCharacter('\u2003'); + break; + } + + case enspace: + { + processCharacter('\u2002'); + break; + } + + case qmspace: + { + processCharacter('\u2005'); + break; + } + + case bullet: + { + processCharacter('\u2022'); + break; + } + + case lquote: + { + processCharacter('\u2018'); + break; + } + + case rquote: + { + processCharacter('\u2019'); + break; + } + + case ldblquote: + { + processCharacter('\u201c'); + break; + } + + case rdblquote: + { + processCharacter('\u201d'); + break; + } + + case backslash: + { + processCharacter('\\'); + break; + } + + case opencurly: + { + processCharacter('{'); + break; + } + + case closecurly: + { + processCharacter('}'); + break; + } + + case f: + { + processFont(parameter); + handleCommand(command, parameter, hasParameter, optionalFlag); + break; + } + + case fcharset: + { + processFontCharset(parameter); + handleCommand(command, parameter, hasParameter, optionalFlag); + break; + } + + default: + { + handleCommand(command, parameter, hasParameter, optionalFlag); + break; + } + } + } + } + + /** + * Set the current font and current font encoding in the state. + */ + private void processFont(int parameter) + { + state.currentFont = parameter; + state.currentFontEncoding = m_fontEncodings.get(Integer.valueOf(parameter)); + } + + /** + * Set the charset for the current font. + */ + private void processFontCharset(int parameter) + { + String charset = FontCharset.getCharset(parameter); + if (charset != null) + { + m_fontEncodings.put(Integer.valueOf(state.currentFont), Encoding.LOCALEID_MAPPING.get(charset)); + } + } + + /** + * Switch the encoding based on the RTF command received. + */ + private void processEncoding(Command command, boolean hasParameter, int parameter) + { + String encoding = null; + switch (command) + { + case ansi: + { + encoding = Encoding.ANSI_ENCODING; + break; + } + + case pc: + { + encoding = Encoding.PC_ENCODING; + break; + } + + case pca: + { + encoding = Encoding.PCA_ENCODING; + break; + } + + case mac: + { + encoding = Encoding.MAC_ENCODING; + break; + } + + case ansicpg: + { + encoding = hasParameter ? Encoding.LOCALEID_MAPPING.get(Integer.toString(parameter)) : null; + break; + } + + default: + { + encoding = null; + break; + } + } + + if (encoding == null) + { + throw new IllegalArgumentException("Unsupported encoding command " + command.getCommandName() + (hasParameter ? parameter : "")); + } + + state.currentEncoding = encoding; + } + + /** + * Process an RTF command parameter representing a Unicode character. + */ + private void processUnicode(int parameter) + { + if (parameter < 0) + { + parameter += 65536; + } + + processCharacter((char) parameter); + skipBytes = state.unicodeAlternateSkipCount; + } + + /** + * Set the number of bytes to skip after a Unicode character. + */ + private void processUnicodeAlternateSkipCount(int parameter) + { + state.unicodeAlternateSkipCount = parameter; + } + + /** + * Process a upr command: consume all of the RTF commands relating to this + * and emit events representing the Unicode content. + * @param command + */ + private void processUpr(IParserEvent command) + { + IParserEventHandler uprHandler = new UprHandler(handler); + uprHandler.handleEvent(command); + + handlerStack.push(handler); + handler = uprHandler; + } + + /** + * Process a single character. + */ + private void processCharacter(char c) + { + handleEvent(new StringEvent(Character.toString(c))); + } + + /** + * Process an RTF command. + */ + private void handleCommand(Command command, int parameter, boolean hasParameter, boolean optional) + { + handleEvent(new CommandEvent(command, parameter, hasParameter, optional)); + } + + /** + * Pass an event to the event handler, pop the event handler stack if the current + * event handler has consumed all of the events it can. + */ + private void handleEvent(IParserEvent event) + { + handler.handleEvent(event); + if (handler.isComplete()) + { + handler = handlerStack.pop(); + } + } + + private IParserEventHandler handler; + private final Deque handlerStack = new ArrayDeque(); + + private ParserState state = new ParserState(); + private final Deque stack = new ArrayDeque(); + private int skipBytes; + private Map m_fontEncodings = new HashMap(); + + private static final IParserEvent DOCUMENT_START = new DocumentStartEvent(); + private static final IParserEvent DOCUMENT_END = new DocumentEndEvent(); + private static final IParserEvent GROUP_START = new GroupStartEvent(); + private static final IParserEvent GROUP_END = new GroupEndEvent(); +} From 4becf02745de48dc68cd9d83ea9828f82e39c28c Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 8 Jun 2016 11:34:35 +0100 Subject: [PATCH 11/30] Handle signed values. --- .../parser/standard/StandardRtfParser.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java index 6f9e24e..7991d78 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java @@ -339,7 +339,7 @@ private void processEncoding(Command command, boolean hasParameter, int paramete case ansicpg: { - encoding = hasParameter ? Encoding.LOCALEID_MAPPING.get(Integer.toString(parameter)) : null; + encoding = hasParameter ? Encoding.LOCALEID_MAPPING.get(Integer.toString(unsignedValue(parameter))) : null; break; } @@ -363,12 +363,7 @@ private void processEncoding(Command command, boolean hasParameter, int paramete */ private void processUnicode(int parameter) { - if (parameter < 0) - { - parameter += 65536; - } - - processCharacter((char) parameter); + processCharacter((char) unsignedValue(parameter)); skipBytes = state.unicodeAlternateSkipCount; } @@ -423,6 +418,15 @@ private void handleEvent(IParserEvent event) } } + private int unsignedValue(int parameter) + { + if (parameter < 0) + { + parameter += 65536; + } + return parameter; + } + private IParserEventHandler handler; private final Deque handlerStack = new ArrayDeque(); From a2ab5fb09c7f213d996b6ce825dad332ef4ee050 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 8 Jun 2016 11:36:26 +0100 Subject: [PATCH 12/30] Add support for UTF-8 encoding. Add comments noting details of other encodings --- .../parser/standard/Encoding.java | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java index 34db201..12afabb 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java @@ -32,9 +32,37 @@ class Encoding public static final Map LOCALEID_MAPPING = new HashMap(); static { + // Comment lines based on: https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx + + // 037 IBM037 IBM EBCDIC US-Canada + // 437 IBM437 OEM United States + // 500 IBM500 IBM EBCDIC International + // 708 ASMO-708 Arabic (ASMO 708) + // 709 Arabic (ASMO-449+, BCON V4) + // 710 Arabic - Transparent Arabic + // 720 DOS-720 Arabic (Transparent ASMO); Arabic (DOS) + // 737 ibm737 OEM Greek (formerly 437G); Greek (DOS) + // 775 ibm775 OEM Baltic; Baltic (DOS) + // 850 ibm850 OEM Multilingual Latin 1; Western European (DOS) + // 852 ibm852 OEM Latin 2; Central European (DOS) + // 855 IBM855 OEM Cyrillic (primarily Russian) + // 857 ibm857 OEM Turkish; Turkish (DOS) + // 858 IBM00858 OEM Multilingual Latin 1 + Euro symbol + // 860 IBM860 OEM Portuguese; Portuguese (DOS) + // 861 ibm861 OEM Icelandic; Icelandic (DOS) + // 862 DOS-862 OEM Hebrew; Hebrew (DOS) + // 863 IBM863 OEM French Canadian; French Canadian (DOS) + // 864 IBM864 OEM Arabic; Arabic (864) + // 865 IBM865 OEM Nordic; Nordic (DOS) + // 866 cp866 OEM Russian; Cyrillic (DOS) + // 869 ibm869 OEM Modern Greek; Greek, Modern (DOS) + // 870 IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 + // 874 windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows) + // 875 cp875 IBM EBCDIC Greek Modern LOCALEID_MAPPING.put("932", "SJIS"); // Japanese LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese LOCALEID_MAPPING.put("949", "Cp949"); // Korean + // 950 big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) LOCALEID_MAPPING.put("1025", "Cp1256"); // Arabic (Saudi Arabia) LOCALEID_MAPPING.put("1026", "Cp1251"); // Bulgarian LOCALEID_MAPPING.put("1028", "Cp950"); // Chinese (Taiwan) @@ -45,6 +73,7 @@ class Encoding LOCALEID_MAPPING.put("1041", "SJIS"); // Japanese LOCALEID_MAPPING.put("1042", "Cp949"); // Korean LOCALEID_MAPPING.put("1045", "Cp1250"); // Polish + // 1047 IBM01047 IBM EBCDIC Latin 1/Open System LOCALEID_MAPPING.put("1048", "Cp1250"); // Romanian LOCALEID_MAPPING.put("1049", "Cp1251"); // Russian LOCALEID_MAPPING.put("1050", "Cp1250"); // Croatian @@ -68,6 +97,18 @@ class Encoding LOCALEID_MAPPING.put("1091", "Cp1254"); // Uzbek (Latin) LOCALEID_MAPPING.put("1092", "Cp1251"); // Tatar LOCALEID_MAPPING.put("1104", "Cp1251"); // Mongolian (Cyrillic) + // 1140 IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) + // 1141 IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) + // 1142 IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) + // 1143 IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) + // 1144 IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) + // 1145 IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) + // 1146 IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) + // 1147 IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) + // 1148 IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) + // 1149 IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) + // 1200 utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646) + // 1201 unicodeFFFE Unicode UTF-16, big endian byte order LOCALEID_MAPPING.put("1250", "Cp1250"); // Windows Latin 2 (Central Europe) LOCALEID_MAPPING.put("1251", "Cp1251"); // Cyrillic LOCALEID_MAPPING.put("1252", "Cp1252"); // Latin @@ -75,6 +116,9 @@ class Encoding LOCALEID_MAPPING.put("1254", "Cp1254"); // Turkish LOCALEID_MAPPING.put("1255", "Cp1255"); // Windows Hebrew LOCALEID_MAPPING.put("1256", "Cp1256"); // Arabic (Iraq) + LOCALEID_MAPPING.put("1257", "Cp1257"); // Baltic + LOCALEID_MAPPING.put("1258", "Cp1258"); // Vietnamese + // 1361 Johab Korean (Johab) LOCALEID_MAPPING.put("2049", "Cp1256"); // Arabic (Iraq) LOCALEID_MAPPING.put("2052", "MS936"); // Chinese (PRC) LOCALEID_MAPPING.put("2074", "Cp1250"); // Serbian (Latin) @@ -92,12 +136,110 @@ class Encoding LOCALEID_MAPPING.put("8193", "Cp1256"); // Arabic (Oman) LOCALEID_MAPPING.put("9217", "Cp1256"); // Arabic (Yemen) LOCALEID_MAPPING.put("10000", "MacRoman"); // Mac Roman + // 10001 x-mac-japanese Japanese (Mac) + // 10002 x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac) + // 10003 x-mac-korean Korean (Mac) + // 10004 x-mac-arabic Arabic (Mac) + // 10005 x-mac-hebrew Hebrew (Mac) + // 10006 x-mac-greek Greek (Mac) + // 10007 x-mac-cyrillic Cyrillic (Mac) + // 10008 x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) + // 10010 x-mac-romanian Romanian (Mac) + // 10017 x-mac-ukrainian Ukrainian (Mac) + // 10021 x-mac-thai Thai (Mac) + // 10029 x-mac-ce MAC Latin 2; Central European (Mac) + // 10079 x-mac-icelandic Icelandic (Mac) + // 10081 x-mac-turkish Turkish (Mac) + // 10082 x-mac-croatian Croatian (Mac) LOCALEID_MAPPING.put("10241", "Cp1256"); // Arabic (Syria) LOCALEID_MAPPING.put("11265", "Cp1256"); // Arabic (Jordan) + // 12000 utf-32 Unicode UTF-32, little endian byte order + // 12001 utf-32BE Unicode UTF-32, big endian byte order LOCALEID_MAPPING.put("12289", "Cp1256"); // Arabic (Lebanon) LOCALEID_MAPPING.put("13313", "Cp1256"); // Arabic (Kuwait) LOCALEID_MAPPING.put("14337", "Cp1256"); // Arabic (U.A.E.) LOCALEID_MAPPING.put("15361", "Cp1256"); // Arabic (Bahrain) LOCALEID_MAPPING.put("16385", "Cp1256"); // Arabic (Qatar) + // 20000 x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS) + // 20001 x-cp20001 TCA Taiwan + // 20002 x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten) + // 20003 x-cp20003 IBM5550 Taiwan + // 20004 x-cp20004 TeleText Taiwan + // 20005 x-cp20005 Wang Taiwan + // 20105 x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) + // 20106 x-IA5-German IA5 German (7-bit) + // 20107 x-IA5-Swedish IA5 Swedish (7-bit) + // 20108 x-IA5-Norwegian IA5 Norwegian (7-bit) + // 20127 us-ascii US-ASCII (7-bit) + // 20261 x-cp20261 T.61 + // 20269 x-cp20269 ISO 6937 Non-Spacing Accent + // 20273 IBM273 IBM EBCDIC Germany + // 20277 IBM277 IBM EBCDIC Denmark-Norway + // 20278 IBM278 IBM EBCDIC Finland-Sweden + // 20280 IBM280 IBM EBCDIC Italy + // 20284 IBM284 IBM EBCDIC Latin America-Spain + // 20285 IBM285 IBM EBCDIC United Kingdom + // 20290 IBM290 IBM EBCDIC Japanese Katakana Extended + // 20297 IBM297 IBM EBCDIC France + // 20420 IBM420 IBM EBCDIC Arabic + // 20423 IBM423 IBM EBCDIC Greek + // 20424 IBM424 IBM EBCDIC Hebrew + // 20833 x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended + // 20838 IBM-Thai IBM EBCDIC Thai + // 20866 koi8-r Russian (KOI8-R); Cyrillic (KOI8-R) + // 20871 IBM871 IBM EBCDIC Icelandic + // 20880 IBM880 IBM EBCDIC Cyrillic Russian + // 20905 IBM905 IBM EBCDIC Turkish + // 20924 IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) + // 20932 EUC-JP Japanese (JIS 0208-1990 and 0212-1990) + // 20936 x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) + // 20949 x-cp20949 Korean Wansung + // 21025 cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian + // 21027 (deprecated) + // 21866 koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U) + // 28591 iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO) + // 28592 iso-8859-2 ISO 8859-2 Central European; Central European (ISO) + // 28593 iso-8859-3 ISO 8859-3 Latin 3 + // 28594 iso-8859-4 ISO 8859-4 Baltic + // 28595 iso-8859-5 ISO 8859-5 Cyrillic + // 28596 iso-8859-6 ISO 8859-6 Arabic + // 28597 iso-8859-7 ISO 8859-7 Greek + // 28598 iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual) + // 28599 iso-8859-9 ISO 8859-9 Turkish + // 28603 iso-8859-13 ISO 8859-13 Estonian + // 28605 iso-8859-15 ISO 8859-15 Latin 9 + // 29001 x-Europa Europa 3 + // 38598 iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical) + // 50220 iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) + // 50221 csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) + // 50222 iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) + // 50225 iso-2022-kr ISO 2022 Korean + // 50227 x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) + // 50229 ISO 2022 Traditional Chinese + // 50930 EBCDIC Japanese (Katakana) Extended + // 50931 EBCDIC US-Canada and Japanese + // 50933 EBCDIC Korean Extended and Korean + // 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese + // 50936 EBCDIC Simplified Chinese + // 50937 EBCDIC US-Canada and Traditional Chinese + // 50939 EBCDIC Japanese (Latin) Extended and Japanese + // 51932 euc-jp EUC Japanese + // 51936 EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC) + // 51949 euc-kr EUC Korean + // 51950 EUC Traditional Chinese + // 52936 hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) + // 54936 GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) + // 57002 x-iscii-de ISCII Devanagari + // 57003 x-iscii-be ISCII Bangla + // 57004 x-iscii-ta ISCII Tamil + // 57005 x-iscii-te ISCII Telugu + // 57006 x-iscii-as ISCII Assamese + // 57007 x-iscii-or ISCII Odia + // 57008 x-iscii-ka ISCII Kannada + // 57009 x-iscii-ma ISCII Malayalam + // 57010 x-iscii-gu ISCII Gujarati + // 57011 x-iscii-pa ISCII Punjabi + LOCALEID_MAPPING.put("65000", null); // UTF-7 - not a supported Java encoding, see: http://stackoverflow.com/questions/19861987/java-io-unsupportedencodingexception-unicode-1-1-utf-7 + LOCALEID_MAPPING.put("65001", "UTF-8"); // UTF-8 } } From 87c1892bb222ea6c866cc6aa77754807720e1e22 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 15 Jun 2016 14:57:54 +0100 Subject: [PATCH 13/30] Update symbol code page --- .../src/com/rtfparserkit/parser/standard/FontCharset.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/FontCharset.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/FontCharset.java index 048198a..6250528 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/FontCharset.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/FontCharset.java @@ -39,7 +39,7 @@ public static String getCharset(int parameter) { MAPPING[0] = "1252"; // ANSI MAPPING[1] = null; // Default - MAPPING[2] = "42"; // Symbol + MAPPING[2] = "1252"; // Symbol - according to the specs this is codepage 42 "Symbol". What's the Java equivalent? 1252 seems to work... MAPPING[77] = "10000"; // Mac Roman MAPPING[78] = "10001"; // Mac Shift Jis MAPPING[79] = "10003"; // Mac Hangul From 49da4bf10a1632b0c7cb207f73c28aaaed88dd9b Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Thu, 5 Jan 2017 13:42:58 +0000 Subject: [PATCH 14/30] Add support for additional code pages (437, 874, 950, 10001, 10004, 10005, 10006, 10007, 10010, 10017, 10021, 10029, 10079, 10081, 10082) --- .../parser/standard/Encoding.java | 30 +- .../standard/StandardRtfParserTest.java | 30 + .../standard/data/test10001Encoding.rtf | 11 + .../standard/data/test10001Encoding.xml | 107 +++ .../standard/data/test10007Encoding.rtf | 12 + .../standard/data/test10007Encoding.xml | 53 ++ .../parser/standard/data/test437Encoding.rtf | 29 + .../parser/standard/data/test437Encoding.xml | 204 ++++++ .../parser/standard/data/test874Encoding.rtf | 20 + .../parser/standard/data/test874Encoding.xml | 640 ++++++++++++++++++ .../parser/standard/data/test950Encoding.rtf | 13 + .../parser/standard/data/test950Encoding.xml | 56 ++ 12 files changed, 1190 insertions(+), 15 deletions(-) create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java index 12afabb..7c9269d 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java @@ -35,7 +35,7 @@ class Encoding // Comment lines based on: https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx // 037 IBM037 IBM EBCDIC US-Canada - // 437 IBM437 OEM United States + LOCALEID_MAPPING.put("437", "Cp437"); // IBM437 OEM United States // 500 IBM500 IBM EBCDIC International // 708 ASMO-708 Arabic (ASMO 708) // 709 Arabic (ASMO-449+, BCON V4) @@ -57,12 +57,12 @@ class Encoding // 866 cp866 OEM Russian; Cyrillic (DOS) // 869 ibm869 OEM Modern Greek; Greek, Modern (DOS) // 870 IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 - // 874 windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows) + LOCALEID_MAPPING.put("874", "Cp874"); // windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows) // 875 cp875 IBM EBCDIC Greek Modern LOCALEID_MAPPING.put("932", "SJIS"); // Japanese LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese LOCALEID_MAPPING.put("949", "Cp949"); // Korean - // 950 big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) + LOCALEID_MAPPING.put("950", "Cp950"); // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) LOCALEID_MAPPING.put("1025", "Cp1256"); // Arabic (Saudi Arabia) LOCALEID_MAPPING.put("1026", "Cp1251"); // Bulgarian LOCALEID_MAPPING.put("1028", "Cp950"); // Chinese (Taiwan) @@ -136,21 +136,21 @@ class Encoding LOCALEID_MAPPING.put("8193", "Cp1256"); // Arabic (Oman) LOCALEID_MAPPING.put("9217", "Cp1256"); // Arabic (Yemen) LOCALEID_MAPPING.put("10000", "MacRoman"); // Mac Roman - // 10001 x-mac-japanese Japanese (Mac) + LOCALEID_MAPPING.put("10001", "Shift_JIS"); // x-mac-japanese Japanese (Mac) // 10002 x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac) // 10003 x-mac-korean Korean (Mac) - // 10004 x-mac-arabic Arabic (Mac) - // 10005 x-mac-hebrew Hebrew (Mac) - // 10006 x-mac-greek Greek (Mac) - // 10007 x-mac-cyrillic Cyrillic (Mac) + LOCALEID_MAPPING.put("10004", "x-MacArabic"); // x-mac-arabic Arabic (Mac) + LOCALEID_MAPPING.put("10005", "x-MacHebrew"); // x-mac-hebrew Hebrew (Mac) + LOCALEID_MAPPING.put("10006", "x-MacHebrew"); // x-mac-greek Greek (Mac) + LOCALEID_MAPPING.put("10007", "x-MacCyrillic"); // x-mac-cyrillic Cyrillic (Mac) // 10008 x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) - // 10010 x-mac-romanian Romanian (Mac) - // 10017 x-mac-ukrainian Ukrainian (Mac) - // 10021 x-mac-thai Thai (Mac) - // 10029 x-mac-ce MAC Latin 2; Central European (Mac) - // 10079 x-mac-icelandic Icelandic (Mac) - // 10081 x-mac-turkish Turkish (Mac) - // 10082 x-mac-croatian Croatian (Mac) + LOCALEID_MAPPING.put("10010", "x-MacRomania"); // x-mac-romanian Romanian (Mac) + LOCALEID_MAPPING.put("10017", "x-MacUkraine"); // x-mac-ukrainian Ukrainian (Mac) + LOCALEID_MAPPING.put("10021", "x-MacThai"); // x-mac-thai Thai (Mac) + LOCALEID_MAPPING.put("10029", "x-MacCentralEurope"); // x-mac-ce MAC Latin 2; Central European (Mac) + LOCALEID_MAPPING.put("10079", "x-MacIceland"); // x-mac-icelandic Icelandic (Mac) + LOCALEID_MAPPING.put("10081", "x-MacTurkish"); // x-mac-turkish Turkish (Mac) + LOCALEID_MAPPING.put("10082", "x-MacCroatian"); // x-mac-croatian Croatian (Mac) LOCALEID_MAPPING.put("10241", "Cp1256"); // Arabic (Syria) LOCALEID_MAPPING.put("11265", "Cp1256"); // Arabic (Jordan) // 12000 utf-32 Unicode UTF-32, little endian byte order diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index 4b82417..44512d9 100644 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -89,6 +89,36 @@ public void testGreekEncoding() throws Exception TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testGreekEncoding"); } + @Test + public void test437Encoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test437Encoding"); + } + + @Test + public void test874Encoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test874Encoding"); + } + + @Test + public void test950Encoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test950Encoding"); + } + + @Test + public void test10001Encoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test10001Encoding"); + } + + @Test + public void test10007Encoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test10007Encoding"); + } + @Test public void testKoreanEncoding() throws Exception { diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf new file mode 100644 index 0000000..071d7e0 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf @@ -0,0 +1,11 @@ +{\rtf1\mac\ansicpg10001\cocoartf824\cocoasubrtf230 +{\fonttbl\f0\fnil\fcharset78 HiraKakuPro-W6;\f1\fswiss\fcharset77 Helvetica-Bold;\f2\fswiss\fcharset77 Helvetica; +\f3\fnil\fcharset78 HiraKakuPro-W3;} +{\colortbl;\red255\green255\blue255;} +{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc2\leveljcn2\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid0\'02\'05.;}{\levelnumbers\'01;}}{\listname ;}\listid1}} +{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}} +\margl1440\margr1440\vieww12240\viewh8980\viewkind0 +\pard\tx220\tx720\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\li720\fi-720\ql\qnatural\pardirnatural +\ls1\ilvl0 +\f0\b\fs50 \cf0 \'82\'a8\'93\'c7\'82\'dd\'82\'ad\'82\'be\'82\'b3\'82\'a2 +} \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml new file mode 100644 index 0000000..428181c --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml @@ -0,0 +1,107 @@ + + + + + + + + + + + + HiraKakuPro-W6; + + + + Helvetica-Bold; + + + + Helvetica; + + + + HiraKakuPro-W3; + + + + ; + + + + ; + + + + + + + + + + + + + + + + + + + {disc} + + + + + .; + + + + ; + + + + + ; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + お読みください + + \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf new file mode 100644 index 0000000..43c47e6 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf @@ -0,0 +1,12 @@ +{\rtf1\mac\ansicpg10007\cocoartf102 +{\fonttbl\f0\fnil\fcharset77 LucidaGrande;\f1\fnil\fcharset77 Georgia;\f2\fnil\fcharset77 Verdana; +} +{\colortbl;\red255\green255\blue255;} +\margl1440\margr1440\vieww16780\viewh13600\viewkind0 +\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural + +\f0\fs36 \cf0 \uc0\u1050 \u1086 \u1084 \u1084 \u1072 \u1085 \u1076 \u1072 \u1088 \u1072 \u1079 \u1088 \u1072 \u1073 \u1086 \u1090 \u1095 \u1080 \u1082 \u1086 \u1074 +\f1 \ +} + + diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml new file mode 100644 index 0000000..37c7a39 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + LucidaGrande; + + + + Georgia; + + + + Verdana; + + + + ; + + + + ; + + + + + + + + + + + + + + + + + + + + + + Комманда разработчиков + + + + \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.rtf new file mode 100644 index 0000000..72138f2 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.rtf @@ -0,0 +1,29 @@ +{\rtf1 \mac \ansicpg437 \cocoartf102 {\fonttbl {\f0 \fnil \fcharset77 Times New Roman{\*\falt Times} +;} +{\f1 \fnil \fcharset77 Helvetica-Bold{\*\falt Helvetica} +;} +{\f2 \fnil \fcharset77 Helvetica;} +{\f3 \fnil \fcharset77 LucidaGrande{\*\falt Lucida Grande} +;} +} +{\colortbl ;\red0 \green0 \blue0 ;} +{\stylesheet {\*\cs335 {\*\nsmpltxt The quick brown fox jumped over the lazy dogs.} +\super footnote reference;} +{\*\cs336 {\*\nsmpltxt The quick brown fox jumped over the lazy dogs.} +\super endnote reference;} +{\s337 \nisusnoteplacement0 \nisusreferencestyle335 {\*\nsmpltxt Some text goes here so you can see what your style will look like.} +\f3 footnote text;} +{\s338 \nisusnoteplacement1 \nisusreferencestyle336 {\*\nsmpltxt Sample text for Foot/End Notes Style} +\f3 endnote text;} +} +\deftab720 \defformat \viewkind1 \viewzk1 {\*\nisuswindow \x70 \y194 \w741 \h638 } +\nshwinv0 \nshwpg1 \hyphauto0 \ftnnar \endnotes \aendnotes \aftnnar \fet2 \ftnbj \paperw12240 \paperh15840 \margl1440 \margr1440 \margt1440 \margb1440 \gutter0 \pgnstart1 \nocolbal \sectd \sbknone \cols1 \ltrsect \colbalsxn0 \marglsxn1440 \margrsxn1440 \margtsxn1440 \margbsxn1440 \guttersxn0 \headery720 \footery720 \pgnstarts1 \pgnrestart \pgndec \sxnstarts1 \sxnrestart \sxndec {\header \pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 \par } +{\footer \pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 \par } +{\pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 {\f1 \fs24 \b \cf1 Test Document\par +\f2 \b0 \par +\b Test Title: \tab (Test Subtitle)\par +\b0 \par +\b Test Heading:\b0 \par +Test Text. \par} +} +} \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml new file mode 100644 index 0000000..779aa46 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml @@ -0,0 +1,204 @@ + + + + + + + + + + + + Times New Roman + + + Times + + ; + + + + + + Helvetica-Bold + + + Helvetica + + ; + + + + + + Helvetica; + + + + + + LucidaGrande + + + Lucida Grande + + ; + + + + + ; + + + + ; + + + + + + + + The quick brown fox jumped over the lazy dogs. + + + footnote reference; + + + + + + The quick brown fox jumped over the lazy dogs. + + + endnote reference; + + + + + + Some text goes here so you can see what your style will look like. + + + footnote text; + + + + + + Sample text for Foot/End Notes Style + + + endnote text; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Test Document + + + + + + Test Title: + + (Test Subtitle) + + + + + Test Heading: + + + + Test Text. + + + + + \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.rtf new file mode 100644 index 0000000..6306661 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.rtf @@ -0,0 +1,20 @@ +{\rtf1\adeflang1054\ansi\ansicpg874\uc1\adeff22\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1033\deflangfe1033{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}{\f22\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Angsana New;}{\f36\fnil\fcharset222\fprq0{\*\panose 00000000000000000000}SymbolMT;} +{\f149\froman\fcharset238\fprq2 Times New Roman CE;}{\f150\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f152\froman\fcharset161\fprq2 Times New Roman Greek;}{\f153\froman\fcharset162\fprq2 Times New Roman Tur;} +{\f154\froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f155\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f156\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f157\froman\fcharset163\fprq2 Times New Roman (Vietnamese);} +{\f159\fswiss\fcharset238\fprq2 Arial CE;}{\f160\fswiss\fcharset204\fprq2 Arial Cyr;}{\f162\fswiss\fcharset161\fprq2 Arial Greek;}{\f163\fswiss\fcharset162\fprq2 Arial Tur;}{\f164\fswiss\fcharset177\fprq2 Arial (Hebrew);} +{\f165\fswiss\fcharset178\fprq2 Arial (Arabic);}{\f166\fswiss\fcharset186\fprq2 Arial Baltic;}{\f167\fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f378\froman\fcharset222\fprq2 Angsana New (Thai);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255; +\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0; +\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fcs1 \af22\afs24\alang1054 \fcs0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\snext0 Normal;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\* +\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv +\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fcs1 \af0\afs20 \fcs0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0} +{\*\rsidtbl \rsid3489504\rsid8663529\rsid13765641}{\*\generator Microsoft Word 11.0.5604;}{\info{\title 3}{\author Carestream Health Inc.}{\operator SAMAK}{\creatim\yr2009\mo9\dy9\hr14\min28}{\revtim\yr2009\mo9\dy9\hr14\min28}{\version2}{\edmins1} +{\nofpages2}{\nofwords643}{\nofchars3669}{\*\company }{\nofcharsws4304}{\vern24689}}\widowctrl\ftnbj\aenddoc\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\hyphcaps0\horzdoc\dghspace120\dgvspace120\dghorigin1701\dgvorigin1984\dghshow0\dgvshow3 +\jcompress\viewkind4\viewscale100\nolnhtadjtbl\ApplyBrkRules\rsidroot8663529 \fet0\sectd \linex0\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3 +\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}} +{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain +\ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 \fcs1 \af22\afs24\alang1054 \fcs0 \fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\fcs1 \ab\af1\afs28 \fcs0 \b\f1\fs28\cf1\insrsid3489504 1. Test Heading +\par }{\fcs1 \af1\afs20 \fcs0 \f1\fs20\cf1\insrsid3489504 \bullet }{\fcs0 \afs20 \fcs1 \f36\fs20\cf1\lang1054\insrsid3489504 }{\fcs1 \af1\afs20 \fcs0 \f1\fs20\cf1\insrsid3489504 Some test text +\par \bullet }{\fcs0 \afs20 \fcs1 \f36\fs20\cf1\lang1054\insrsid3489504 }{\fcs1 \af1\afs20 \fcs0 \f1\fs20\cf1\insrsid3489504 Some more test text +}} \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml new file mode 100644 index 0000000..0f1152b --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml @@ -0,0 +1,640 @@ + + + + + + + + + + + + + + + + + + + + + + + 02020603050405020304 + + Times New Roman; + + + + + + + + + 020b0604020202020204 + + Arial; + + + + + + + + + 02020603050405020304 + + Angsana New; + + + + + + + + + 00000000000000000000 + + SymbolMT; + + + + + + + Times New Roman CE; + + + + + + + Times New Roman Cyr; + + + + + + + Times New Roman Greek; + + + + + + + Times New Roman Tur; + + + + + + + Times New Roman (Hebrew); + + + + + + + Times New Roman (Arabic); + + + + + + + Times New Roman Baltic; + + + + + + + Times New Roman (Vietnamese); + + + + + + + Arial CE; + + + + + + + Arial Cyr; + + + + + + + Arial Greek; + + + + + + + Arial Tur; + + + + + + + Arial (Hebrew); + + + + + + + Arial (Arabic); + + + + + + + Arial Baltic; + + + + + + + Arial (Vietnamese); + + + + + + + Angsana New (Thai); + + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + ; + + + + + + + + + + + + + + + + + + + + + + + + + + + + Normal; + + + + + + Default Paragraph Font; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Normal Table; + + + + + + + + + + + + + + + + Microsoft Word 11.0.5604; + + + + + + 3 + + + + Carestream Health Inc. + + + + SAMAK + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . + + + + + + + + + + + . + + + + + + + + + + + . + + + + + + + + + + + ) + + + + + + + + + + + ( + + + + ) + + + + + + + + + + + ( + + + + ) + + + + + + + + + + + ( + + + + ) + + + + + + + + + + + ( + + + + ) + + + + + + + + + + + ( + + + + ) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1. Test Heading + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Some test text + + + + + + + + + + + + + + + + + + + + + + + + Some more test text + + + \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.rtf new file mode 100644 index 0000000..a380a50 --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.rtf @@ -0,0 +1,13 @@ +{\rtf1\ansi\ansicpg950\cocoartf1187\cocoasubrtf340 +\cocoascreenfonts1{\fonttbl\f0\fnil\fcharset0 LucidaGrande;} +{\colortbl;\red255\green255\blue255;} +{\info +{\title Title} +{\author Test Author}}\paperw11900\paperh16840\margl1440\margr1440\vieww10980\viewh13860\viewkind0 +\deftab720 +\pard\pardeftab720\ri0\sl560\sa120 + +\f0\fs36 \cf0 Test Text\ +\pard\pardeftab720\ri0\sl360\sb120 + +\fs24 \cf0 Copyright \'a9 2004-2013 Test Project} \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml new file mode 100644 index 0000000..2dc657c --- /dev/null +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml @@ -0,0 +1,56 @@ + + + + + + + + + + + + LucidaGrande; + + + + ; + + + + ; + + + + + + Title + + + + Test Author + + + + + + + + + + + + + + + + Test Text + + + + + + + + Copyright © 2004-2013 Test Project + + \ No newline at end of file From 823ca9aa614465a0070edd488e3af79d295028bc Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Fri, 13 Jan 2017 12:46:17 +0000 Subject: [PATCH 15/30] Update README.md --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 0c371ea..298a1a1 100644 --- a/README.md +++ b/README.md @@ -30,3 +30,16 @@ You provide input to the parser via a class that implements the `IRtfSource` int The other thing you need to provide the parser with is alistener class. The listener class implements the `IRtfListener` listener interface. The interface consists of a set of methods which are called by the parser to inform you of when it encounters different parts of the docuent structure. The set of method, along with some comments describing their purpose can be seen [here](https://github.com/joniles/rtfparserkit/blob/master/RTF%20Parser%20Kit/src/com/rtfparserkit/parser/IRtfListener.java). You don't need to implement all of the `IRtfListener` interface yourself, if you wish you can subclass `RtfListenerAdaptor` which provides empty methods for all of the `IRtfListener` methods. You can then just override the methods you are interested in. + +An example text extractor is provided, you can invoke it like this: +```java +new StreamTextConverter().convert(new RtfStreamSource(inputStream), outputStream, "UTF-8"); +``` +This code reads an RTF file from the `inputStream` and writes the resulting text to the `outputStream` in the encoding specified by the last argument. + +A second example text extractor is also provided, this one extracts text from the RTF file into a string: +```java +StringTextConverter converter = new StringTextConverter(); +converter.convert(new RtfStreamSource(inputStream)); +String extractedText = converter.getText(); +``` From de42a30cc98640de591ffb529d1b986684d03280 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 8 Feb 2017 15:52:19 +0000 Subject: [PATCH 16/30] Correctly handle default encoding. --- .../rtfparserkit/parser/standard/ParserState.java | 2 +- .../parser/standard/StandardRtfParserTest.java | 6 ++++++ .../standard/data/testDefaultEncodingParse.rtf | Bin 0 -> 23 bytes .../standard/data/testDefaultEncodingParse.xml | 8 ++++++++ 4 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf create mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java index 4fe7b3f..c45a8c1 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java @@ -35,7 +35,7 @@ public ParserState(ParserState state) } public int currentFont; - public String currentEncoding; + public String currentEncoding = Encoding.ANSI_ENCODING; public String currentFontEncoding; public int unicodeAlternateSkipCount = 1; } diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index 44512d9..b769a92 100644 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -29,6 +29,12 @@ public void testEncodingParse() throws Exception TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testEncodingParse"); } + @Test + public void testDefaultEncodingParse() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testDefaultEncodingParse"); + } + @Test public void testStylesParse() throws Exception { diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf new file mode 100644 index 0000000000000000000000000000000000000000..3aa0dc673202d55d29f140ed0f1967bc5eec6e2a GIT binary patch literal 23 ecmb=9DJn@b + + + + Test1 + + + \ No newline at end of file From 97c9f271dec132ecdf3d13188d60f882c4bc67d2 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 8 Feb 2017 16:00:52 +0000 Subject: [PATCH 17/30] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 298a1a1..1a4df1c 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ parser.parse(source, listener); ``` You provide input to the parser via a class that implements the `IRtfSource` interface. Two implementations are provided for you, `RtfStreamSource`, for reading RTF from a stream, and `RtfStringSource` for reading RTF from a string. -The other thing you need to provide the parser with is alistener class. The listener class implements the `IRtfListener` listener interface. The interface consists of a set of methods which are called by the parser to inform you of when it encounters different parts of the docuent structure. The set of method, along with some comments describing their purpose can be seen [here](https://github.com/joniles/rtfparserkit/blob/master/RTF%20Parser%20Kit/src/com/rtfparserkit/parser/IRtfListener.java). +The other thing you need to provide the parser with is a listener class. The listener class implements the `IRtfListener` listener interface. The interface consists of a set of methods which are called by the parser to inform you of when it encounters different parts of the docuent structure. The set of method, along with some comments describing their purpose can be seen [here](https://github.com/joniles/rtfparserkit/blob/master/RTF%20Parser%20Kit/src/com/rtfparserkit/parser/IRtfListener.java). You don't need to implement all of the `IRtfListener` interface yourself, if you wish you can subclass `RtfListenerAdaptor` which provides empty methods for all of the `IRtfListener` methods. You can then just override the methods you are interested in. From 98abe9aa24e9f881d4b434a11608cf9952a93a4f Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Thu, 14 Jun 2018 11:24:04 +0100 Subject: [PATCH 18/30] Gracefully handle malformed hex bytes. --- .../rtfparserkit/parser/raw/RawRtfParser.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/raw/RawRtfParser.java b/RTF Parser Kit/src/com/rtfparserkit/parser/raw/RawRtfParser.java index 1d460b8..67ab0d1 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/parser/raw/RawRtfParser.java +++ b/RTF Parser Kit/src/com/rtfparserkit/parser/raw/RawRtfParser.java @@ -135,7 +135,22 @@ private void handleCharacterByte(int ch) throws IOException { throw new IllegalStateException("Unexpected end of file"); } - b += HexUtils.parseHexDigit(ch); + + // Have encountered malformed RTF where only a single hex digit + // has been supplied. e.g. \'AA\'B\'CC so we hit the next \ + // rather than getting a hex digit. Try to handle this specific + // case gracefully by unreading the next character and working with + // the single digit we have. + if (ch == '\\') + { + b = b >> 4; + source.unread(ch); + } + else + { + b += HexUtils.parseHexDigit(ch); + } + buffer.add(b); parsingHex = false; } From 12415ce612479540f29299524d043bad28b8d962 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Mon, 5 Nov 2018 09:35:25 +0000 Subject: [PATCH 19/30] Build using Maven --- .classpath | 33 + .gitignore | 1 + RTF Parser Kit/.project => .project | 40 +- .settings/org.eclipse.jdt.core.prefs | 6 + .settings/org.eclipse.m2e.core.prefs | 4 + README.md | 13 +- RTF Parser Kit/.classpath | 8 - .../.settings/org.eclipse.jdt.core.prefs | 380 ------ .../.settings/org.eclipse.jdt.ui.prefs | 56 - RTF Parser Kit/RTF Parser Kit.jardesc | 16 - RTF Parser Kit/bin/.gitignore | 1 - .../parser/raw/data/testRawParse.xml | 35 - .../parser/raw/data/testSpecialChars.xml | 83 -- .../standard/data/test10001Encoding.xml | 107 -- .../standard/data/test10007Encoding.xml | 53 - .../parser/standard/data/test437Encoding.xml | 204 --- .../parser/standard/data/test874Encoding.xml | 640 ---------- .../parser/standard/data/test950Encoding.xml | 56 - .../data/testDefaultEncodingParse.xml | 8 - .../standard/data/testEncodingParse.xml | 31 - .../parser/standard/data/testGitHubIssue6.xml | 253 ---- .../standard/data/testGreekEncoding.xml | 32 - .../parser/standard/data/testHex.xml | 31 - .../standard/data/testKoreanEncoding.xml | 35 - .../parser/standard/data/testMultiByteHex.xml | 38 - .../standard/data/testNegativeUnicode.xml | 64 - .../parser/standard/data/testSpecialChars.xml | 69 - .../parser/standard/data/testStyles.xml | 64 - .../standard/data/testTurkishEncoding.xml | 39 - .../parser/standard/data/testUnicode.xml | 109 -- .../parser/standard/data/testUpr.xml | 1119 ----------------- RTF Parser Kit/licence.txt => licence.txt | 0 pom.xml | 140 +++ .../converter/text/AbstractTextConverter.java | 0 .../converter/text/StreamTextConverter.java | 0 .../converter/text/StringTextConverter.java | 0 .../com/rtfparserkit/parser/IRtfListener.java | 0 .../com/rtfparserkit/parser/IRtfParser.java | 0 .../com/rtfparserkit/parser/IRtfSource.java | 0 .../parser/RtfListenerAdaptor.java | 0 .../rtfparserkit/parser/RtfStreamSource.java | 0 .../rtfparserkit/parser/RtfStringSource.java | 0 .../rtfparserkit/parser/raw/ByteBuffer.java | 0 .../rtfparserkit/parser/raw/RawRtfParser.java | 0 .../parser/standard/BinaryBytesEvent.java | 0 .../parser/standard/CommandEvent.java | 0 .../parser/standard/DefaultEventHandler.java | 0 .../parser/standard/DocumentEndEvent.java | 0 .../parser/standard/DocumentStartEvent.java | 0 .../parser/standard/Encoding.java | 0 .../parser/standard/FontCharset.java | 0 .../parser/standard/GroupEndEvent.java | 0 .../parser/standard/GroupStartEvent.java | 0 .../parser/standard/IParserEvent.java | 0 .../parser/standard/IParserEventHandler.java | 0 .../parser/standard/ParserEventType.java | 0 .../parser/standard/ParserState.java | 0 .../parser/standard/StandardRtfParser.java | 0 .../parser/standard/StringEvent.java | 0 .../parser/standard/UprHandler.java | 0 .../java}/com/rtfparserkit/rtf/Command.java | 0 .../com/rtfparserkit/rtf/CommandType.java | 0 .../com/rtfparserkit/utils/HexUtils.java | 0 .../java}/com/rtfparserkit/utils/RtfDump.java | 0 .../rtfparserkit/utils/RtfDumpListener.java | 7 +- .../text/StreamTextConverterTest.java | 0 .../text/StringTextConverterTest.java | 0 .../parser/raw/ByteBufferTest.java | 0 .../parser/raw/RawRtfParserTest.java | 0 .../standard/StandardRtfParserTest.java | 0 .../com/rtfparserkit/utils/TestUtilities.java | 0 .../text/data/testTextConversion.rtf | 0 .../text/data/testTextConversion.txt | 0 .../parser/raw/data/testRawParse.rtf | Bin .../parser/raw/data/testRawParse.xml | 1 + .../parser/raw/data/testSpecialChars.rtf | 0 .../parser/raw/data/testSpecialChars.xml | 1 + .../standard/data/test10001Encoding.rtf | 0 .../standard/data/test10001Encoding.xml | 1 + .../standard/data/test10007Encoding.rtf | 0 .../standard/data/test10007Encoding.xml | 1 + .../parser/standard/data/test437Encoding.rtf | 0 .../parser/standard/data/test437Encoding.xml | 1 + .../parser/standard/data/test874Encoding.rtf | 0 .../parser/standard/data/test874Encoding.xml | 1 + .../parser/standard/data/test950Encoding.rtf | 0 .../parser/standard/data/test950Encoding.xml | 1 + .../data/testDefaultEncodingParse.rtf | Bin .../data/testDefaultEncodingParse.xml | 1 + .../standard/data/testEncodingParse.rtf | Bin .../standard/data/testEncodingParse.xml | 1 + .../parser/standard/data/testGitHubIssue6.rtf | 0 .../parser/standard/data/testGitHubIssue6.xml | 1 + .../standard/data/testGreekEncoding.rtf | 0 .../standard/data/testGreekEncoding.xml | 1 + .../parser/standard/data/testHex.rtf | Bin .../parser/standard/data/testHex.xml | 1 + .../standard/data/testKoreanEncoding.rtf | 0 .../standard/data/testKoreanEncoding.xml | 1 + .../parser/standard/data/testMultiByteHex.rtf | 0 .../parser/standard/data/testMultiByteHex.xml | 1 + .../standard/data/testNegativeUnicode.rtf | Bin .../standard/data/testNegativeUnicode.xml | 1 + .../parser/standard/data/testSpecialChars.rtf | 0 .../parser/standard/data/testSpecialChars.xml | 1 + .../parser/standard/data/testStyles.rtf | Bin .../parser/standard/data/testStyles.xml | 1 + .../standard/data/testTurkishEncoding.rtf | 0 .../standard/data/testTurkishEncoding.xml | 1 + .../parser/standard/data/testUnicode.rtf | Bin .../parser/standard/data/testUnicode.xml | 1 + .../parser/standard/data/testUpr.rtf | 0 .../parser/standard/data/testUpr.xml | 1 + 113 files changed, 240 insertions(+), 3555 deletions(-) create mode 100644 .classpath create mode 100644 .gitignore rename RTF Parser Kit/.project => .project (60%) create mode 100644 .settings/org.eclipse.jdt.core.prefs create mode 100644 .settings/org.eclipse.m2e.core.prefs delete mode 100644 RTF Parser Kit/.classpath delete mode 100644 RTF Parser Kit/.settings/org.eclipse.jdt.core.prefs delete mode 100644 RTF Parser Kit/.settings/org.eclipse.jdt.ui.prefs delete mode 100644 RTF Parser Kit/RTF Parser Kit.jardesc delete mode 100644 RTF Parser Kit/bin/.gitignore delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testRawParse.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testSpecialChars.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testEncodingParse.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testHex.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testSpecialChars.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testStyles.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUnicode.xml delete mode 100644 RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUpr.xml rename RTF Parser Kit/licence.txt => licence.txt (100%) create mode 100644 pom.xml rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/converter/text/AbstractTextConverter.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/converter/text/StreamTextConverter.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/converter/text/StringTextConverter.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/IRtfListener.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/IRtfParser.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/IRtfSource.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/RtfListenerAdaptor.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/RtfStreamSource.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/RtfStringSource.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/raw/ByteBuffer.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/raw/RawRtfParser.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/BinaryBytesEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/CommandEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/DefaultEventHandler.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/DocumentEndEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/DocumentStartEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/Encoding.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/FontCharset.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/GroupEndEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/GroupStartEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/IParserEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/IParserEventHandler.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/ParserEventType.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/ParserState.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/StandardRtfParser.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/StringEvent.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/parser/standard/UprHandler.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/rtf/Command.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/rtf/CommandType.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/utils/HexUtils.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/utils/RtfDump.java (100%) rename {RTF Parser Kit/src => src/main/java}/com/rtfparserkit/utils/RtfDumpListener.java (88%) rename {RTF Parser Kit/test => src/test/java}/com/rtfparserkit/converter/text/StreamTextConverterTest.java (100%) rename {RTF Parser Kit/test => src/test/java}/com/rtfparserkit/converter/text/StringTextConverterTest.java (100%) rename {RTF Parser Kit/test => src/test/java}/com/rtfparserkit/parser/raw/ByteBufferTest.java (100%) rename {RTF Parser Kit/test => src/test/java}/com/rtfparserkit/parser/raw/RawRtfParserTest.java (100%) rename {RTF Parser Kit/test => src/test/java}/com/rtfparserkit/parser/standard/StandardRtfParserTest.java (100%) rename {RTF Parser Kit/test => src/test/java}/com/rtfparserkit/utils/TestUtilities.java (100%) rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/converter/text/data/testTextConversion.rtf (100%) rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/converter/text/data/testTextConversion.txt (100%) rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/raw/data/testRawParse.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/raw/data/testRawParse.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/raw/data/testSpecialChars.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/raw/data/testSpecialChars.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/test10001Encoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/test10007Encoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/test437Encoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/test437Encoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/test874Encoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/test874Encoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/test950Encoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/test950Encoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testEncodingParse.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testEncodingParse.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testHex.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testHex.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testMultiByteHex.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testNegativeUnicode.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testSpecialChars.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testSpecialChars.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testStyles.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testStyles.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testUnicode.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testUnicode.xml rename {RTF Parser Kit/test => src/test/resources}/com/rtfparserkit/parser/standard/data/testUpr.rtf (100%) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testUpr.xml diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..065c2a8 --- /dev/null +++ b/.classpath @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b83d222 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/RTF Parser Kit/.project b/.project similarity index 60% rename from RTF Parser Kit/.project rename to .project index cc33ff3..5f2e853 100644 --- a/RTF Parser Kit/.project +++ b/.project @@ -1,17 +1,23 @@ - - - RTF Parser Kit - - - - - - org.eclipse.jdt.core.javabuilder - - - - - - org.eclipse.jdt.core.javanature - - + + + rtfparserkit + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..1e9dd93 --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,6 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.release=disabled +org.eclipse.jdt.core.compiler.source=1.7 diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..14b697b --- /dev/null +++ b/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/README.md b/README.md index 1a4df1c..1dfc7b3 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,18 @@ What's currently included? Getting Started =============== -You have a choice of two parsers to work with, the standard parser and the raw parser. The raw parser carries out minimal processing on the RTF, the standard parser handles character encodings, and translates commands which represent special characters into their Unicode equivalents. Most people will want to use the standard parser. +To install the library, you can either download the latest JAR directly from the GitHub releases page, +or you can add RTF Parser Kit as a dependency using Maven: + +```xml + + com.github.joniles + rtfparserkit + 1.12.0 + +``` + +Once you have the library, you have a choice of two parsers to work with, the standard parser and the raw parser. The raw parser carries out minimal processing on the RTF, the standard parser handles character encodings, and translates commands which represent special characters into their Unicode equivalents. Most people will want to use the standard parser. The parser is invoked like this: ```java diff --git a/RTF Parser Kit/.classpath b/RTF Parser Kit/.classpath deleted file mode 100644 index c0eeba5..0000000 --- a/RTF Parser Kit/.classpath +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/RTF Parser Kit/.settings/org.eclipse.jdt.core.prefs b/RTF Parser Kit/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 5b29a22..0000000 --- a/RTF Parser Kit/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,380 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.annotation.inheritNullAnnotations=disabled -org.eclipse.jdt.core.compiler.annotation.missingNonNullByDefaultAnnotation=ignore -org.eclipse.jdt.core.compiler.annotation.nonnull=org.eclipse.jdt.annotation.NonNull -org.eclipse.jdt.core.compiler.annotation.nonnullbydefault=org.eclipse.jdt.annotation.NonNullByDefault -org.eclipse.jdt.core.compiler.annotation.nullable=org.eclipse.jdt.annotation.Nullable -org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled -org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled -org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 -org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve -org.eclipse.jdt.core.compiler.compliance=1.6 -org.eclipse.jdt.core.compiler.debug.lineNumber=generate -org.eclipse.jdt.core.compiler.debug.localVariable=generate -org.eclipse.jdt.core.compiler.debug.sourceFile=generate -org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning -org.eclipse.jdt.core.compiler.problem.assertIdentifier=error -org.eclipse.jdt.core.compiler.problem.autoboxing=ignore -org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning -org.eclipse.jdt.core.compiler.problem.deadCode=warning -org.eclipse.jdt.core.compiler.problem.deprecation=warning -org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled -org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled -org.eclipse.jdt.core.compiler.problem.discouragedReference=warning -org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore -org.eclipse.jdt.core.compiler.problem.enumIdentifier=error -org.eclipse.jdt.core.compiler.problem.explicitlyClosedAutoCloseable=warning -org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore -org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled -org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore -org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning -org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning -org.eclipse.jdt.core.compiler.problem.includeNullInfoFromAsserts=disabled -org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning -org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=warning -org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore -org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore -org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning -org.eclipse.jdt.core.compiler.problem.missingDefaultCase=ignore -org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore -org.eclipse.jdt.core.compiler.problem.missingEnumCaseDespiteDefault=disabled -org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore -org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=warning -org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled -org.eclipse.jdt.core.compiler.problem.missingSerialVersion=ignore -org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore -org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning -org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning -org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore -org.eclipse.jdt.core.compiler.problem.nonnullParameterAnnotationDropped=warning -org.eclipse.jdt.core.compiler.problem.nullAnnotationInferenceConflict=error -org.eclipse.jdt.core.compiler.problem.nullReference=warning -org.eclipse.jdt.core.compiler.problem.nullSpecViolation=error -org.eclipse.jdt.core.compiler.problem.nullUncheckedConversion=warning -org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning -org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore -org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore -org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore -org.eclipse.jdt.core.compiler.problem.potentiallyUnclosedCloseable=ignore -org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning -org.eclipse.jdt.core.compiler.problem.redundantNullAnnotation=warning -org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore -org.eclipse.jdt.core.compiler.problem.redundantSpecificationOfTypeArguments=warning -org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore -org.eclipse.jdt.core.compiler.problem.reportMethodCanBePotentiallyStatic=ignore -org.eclipse.jdt.core.compiler.problem.reportMethodCanBeStatic=ignore -org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled -org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning -org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled -org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled -org.eclipse.jdt.core.compiler.problem.syntacticNullAnalysisForFields=disabled -org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore -org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning -org.eclipse.jdt.core.compiler.problem.unavoidableGenericTypeProblems=enabled -org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning -org.eclipse.jdt.core.compiler.problem.unclosedCloseable=warning -org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore -org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning -org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore -org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore -org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore -org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore -org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled -org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled -org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled -org.eclipse.jdt.core.compiler.problem.unusedImport=warning -org.eclipse.jdt.core.compiler.problem.unusedLabel=warning -org.eclipse.jdt.core.compiler.problem.unusedLocal=warning -org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore -org.eclipse.jdt.core.compiler.problem.unusedParameter=warning -org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled -org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled -org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled -org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning -org.eclipse.jdt.core.compiler.problem.unusedTypeParameter=ignore -org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning -org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning -org.eclipse.jdt.core.compiler.source=1.6 -org.eclipse.jdt.core.formatter.align_type_members_on_columns=false -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_assignment=0 -org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_compact_if=20 -org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=48 -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 -org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 -org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 -org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 -org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=64 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=64 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=37 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=32 -org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16 -org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_after_package=0 -org.eclipse.jdt.core.formatter.blank_lines_before_field=0 -org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 -org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_before_member_type=0 -org.eclipse.jdt.core.formatter.blank_lines_before_method=1 -org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=0 -org.eclipse.jdt.core.formatter.blank_lines_before_package=1 -org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 -org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 -org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=next_line -org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=next_line -org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block=next_line -org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=next_line -org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=next_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=next_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=next_line -org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=next_line -org.eclipse.jdt.core.formatter.brace_position_for_switch=next_line -org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=next_line -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false -org.eclipse.jdt.core.formatter.comment.format_block_comments=false -org.eclipse.jdt.core.formatter.comment.format_header=false -org.eclipse.jdt.core.formatter.comment.format_html=true -org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=false -org.eclipse.jdt.core.formatter.comment.format_line_comments=false -org.eclipse.jdt.core.formatter.comment.format_source_code=true -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true -org.eclipse.jdt.core.formatter.comment.indent_root_tags=true -org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert -org.eclipse.jdt.core.formatter.comment.line_length=80 -org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true -org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true -org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false -org.eclipse.jdt.core.formatter.compact_else_if=false -org.eclipse.jdt.core.formatter.continuation_indentation=3 -org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=3 -org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off -org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on -org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false -org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true -org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_empty_lines=false -org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true -org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true -org.eclipse.jdt.core.formatter.indentation.size=3 -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=insert -org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=insert -org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=insert -org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert -org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert -org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.join_lines_in_comments=true -org.eclipse.jdt.core.formatter.join_wrapped_lines=true -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false -org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false -org.eclipse.jdt.core.formatter.lineSplit=999 -org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false -org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false -org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 -org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 -org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=false -org.eclipse.jdt.core.formatter.tabulation.char=space -org.eclipse.jdt.core.formatter.tabulation.size=3 -org.eclipse.jdt.core.formatter.use_on_off_tags=false -org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false -org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true -org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true -org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true diff --git a/RTF Parser Kit/.settings/org.eclipse.jdt.ui.prefs b/RTF Parser Kit/.settings/org.eclipse.jdt.ui.prefs deleted file mode 100644 index 8af9ea7..0000000 --- a/RTF Parser Kit/.settings/org.eclipse.jdt.ui.prefs +++ /dev/null @@ -1,56 +0,0 @@ -eclipse.preferences.version=1 -editor_save_participant_org.eclipse.jdt.ui.postsavelistener.cleanup=true -formatter_profile=_RTFKit -formatter_settings_version=12 -sp_cleanup.add_default_serial_version_id=true -sp_cleanup.add_generated_serial_version_id=false -sp_cleanup.add_missing_annotations=true -sp_cleanup.add_missing_deprecated_annotations=true -sp_cleanup.add_missing_methods=false -sp_cleanup.add_missing_nls_tags=false -sp_cleanup.add_missing_override_annotations=true -sp_cleanup.add_missing_override_annotations_interface_methods=true -sp_cleanup.add_serial_version_id=false -sp_cleanup.always_use_blocks=true -sp_cleanup.always_use_parentheses_in_expressions=false -sp_cleanup.always_use_this_for_non_static_field_access=false -sp_cleanup.always_use_this_for_non_static_method_access=false -sp_cleanup.convert_to_enhanced_for_loop=false -sp_cleanup.correct_indentation=false -sp_cleanup.format_source_code=true -sp_cleanup.format_source_code_changes_only=false -sp_cleanup.make_local_variable_final=false -sp_cleanup.make_parameters_final=false -sp_cleanup.make_private_fields_final=true -sp_cleanup.make_type_abstract_if_missing_method=false -sp_cleanup.make_variable_declarations_final=true -sp_cleanup.never_use_blocks=false -sp_cleanup.never_use_parentheses_in_expressions=true -sp_cleanup.on_save_use_additional_actions=false -sp_cleanup.organize_imports=true -sp_cleanup.qualify_static_field_accesses_with_declaring_class=false -sp_cleanup.qualify_static_member_accesses_through_instances_with_declaring_class=true -sp_cleanup.qualify_static_member_accesses_through_subtypes_with_declaring_class=true -sp_cleanup.qualify_static_member_accesses_with_declaring_class=false -sp_cleanup.qualify_static_method_accesses_with_declaring_class=false -sp_cleanup.remove_private_constructors=true -sp_cleanup.remove_trailing_whitespaces=false -sp_cleanup.remove_trailing_whitespaces_all=true -sp_cleanup.remove_trailing_whitespaces_ignore_empty=false -sp_cleanup.remove_unnecessary_casts=true -sp_cleanup.remove_unnecessary_nls_tags=false -sp_cleanup.remove_unused_imports=false -sp_cleanup.remove_unused_local_variables=false -sp_cleanup.remove_unused_private_fields=true -sp_cleanup.remove_unused_private_members=false -sp_cleanup.remove_unused_private_methods=true -sp_cleanup.remove_unused_private_types=true -sp_cleanup.sort_members=false -sp_cleanup.sort_members_all=false -sp_cleanup.use_blocks=false -sp_cleanup.use_blocks_only_for_return_and_throw=false -sp_cleanup.use_parentheses_in_expressions=false -sp_cleanup.use_this_for_non_static_field_access=false -sp_cleanup.use_this_for_non_static_field_access_only_if_necessary=true -sp_cleanup.use_this_for_non_static_method_access=false -sp_cleanup.use_this_for_non_static_method_access_only_if_necessary=true diff --git a/RTF Parser Kit/RTF Parser Kit.jardesc b/RTF Parser Kit/RTF Parser Kit.jardesc deleted file mode 100644 index 8953e24..0000000 --- a/RTF Parser Kit/RTF Parser Kit.jardesc +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/RTF Parser Kit/bin/.gitignore b/RTF Parser Kit/bin/.gitignore deleted file mode 100644 index c2d9872..0000000 --- a/RTF Parser Kit/bin/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/com/ diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testRawParse.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testRawParse.xml deleted file mode 100644 index 6c4f3d3..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testRawParse.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - - Calibri; - - - - - - Msftedit 5.41.21.2510; - - - - - - - - - - - Test1 - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testSpecialChars.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testSpecialChars.xml deleted file mode 100644 index f24a667..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testSpecialChars.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - - - - - - - - - - - - Calibri; - - - - - - Msftedit 5.41.21.2510; - - - - - - - - - - - Tab A - - B - - Tab A - - B - - CR A - - B - - LF A - - B - - This is a line break. - - Here is the new line. - - Emdash: - - - Endash: - - - Emspace: - - - Enspace: - - - Qmspace: - - - Bullet: - - - Lquote: - - - Rquote: - - - Ldblquote: - - - Rdblquote: - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml deleted file mode 100644 index 428181c..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.xml +++ /dev/null @@ -1,107 +0,0 @@ - - - - - - - - - - - - HiraKakuPro-W6; - - - - Helvetica-Bold; - - - - Helvetica; - - - - HiraKakuPro-W3; - - - - ; - - - - ; - - - - - - - - - - - - - - - - - - - {disc} - - - - - .; - - - - ; - - - - - ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - お読みください - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml deleted file mode 100644 index 37c7a39..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - - - - - - - - LucidaGrande; - - - - Georgia; - - - - Verdana; - - - - ; - - - - ; - - - - - - - - - - - - - - - - - - - - - - Комманда разработчиков - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml deleted file mode 100644 index 779aa46..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.xml +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - - - - - Times New Roman - - - Times - - ; - - - - - - Helvetica-Bold - - - Helvetica - - ; - - - - - - Helvetica; - - - - - - LucidaGrande - - - Lucida Grande - - ; - - - - - ; - - - - ; - - - - - - - - The quick brown fox jumped over the lazy dogs. - - - footnote reference; - - - - - - The quick brown fox jumped over the lazy dogs. - - - endnote reference; - - - - - - Some text goes here so you can see what your style will look like. - - - footnote text; - - - - - - Sample text for Foot/End Notes Style - - - endnote text; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Test Document - - - - - - Test Title: - - (Test Subtitle) - - - - - Test Heading: - - - - Test Text. - - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml deleted file mode 100644 index 0f1152b..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.xml +++ /dev/null @@ -1,640 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - 02020603050405020304 - - Times New Roman; - - - - - - - - - 020b0604020202020204 - - Arial; - - - - - - - - - 02020603050405020304 - - Angsana New; - - - - - - - - - 00000000000000000000 - - SymbolMT; - - - - - - - Times New Roman CE; - - - - - - - Times New Roman Cyr; - - - - - - - Times New Roman Greek; - - - - - - - Times New Roman Tur; - - - - - - - Times New Roman (Hebrew); - - - - - - - Times New Roman (Arabic); - - - - - - - Times New Roman Baltic; - - - - - - - Times New Roman (Vietnamese); - - - - - - - Arial CE; - - - - - - - Arial Cyr; - - - - - - - Arial Greek; - - - - - - - Arial Tur; - - - - - - - Arial (Hebrew); - - - - - - - Arial (Arabic); - - - - - - - Arial Baltic; - - - - - - - Arial (Vietnamese); - - - - - - - Angsana New (Thai); - - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - Normal; - - - - - - Default Paragraph Font; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Normal Table; - - - - - - - - - - - - - - - - Microsoft Word 11.0.5604; - - - - - - 3 - - - - Carestream Health Inc. - - - - SAMAK - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . - - - - - - - - - - - . - - - - - - - - - - - . - - - - - - - - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1. Test Heading - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Some test text - - - - - - - - - - - - - - - - - - - - - - - - Some more test text - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml deleted file mode 100644 index 2dc657c..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - LucidaGrande; - - - - ; - - - - ; - - - - - - Title - - - - Test Author - - - - - - - - - - - - - - - - Test Text - - - - - - - - Copyright © 2004-2013 Test Project - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml deleted file mode 100644 index ad38a69..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - Test1 - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testEncodingParse.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testEncodingParse.xml deleted file mode 100644 index 10d026b..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testEncodingParse.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - Calibri; - - - - - Msftedit 5.41.21.2510; - - - - - - - - - - Test1 - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml deleted file mode 100644 index f6a81d6..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml +++ /dev/null @@ -1,253 +0,0 @@ - - - - - - - - - - - - Times New Roman; - - - - - - Arial; - - - - - - Courier; - - - - - - - - ; - - - - ; - - - - - - - - - - - - - Normal; - - - - - - - - - - - - heading 3; - - - - - - - - - - - - - heading 2; - - - - - - - - - - - - heading 1; - - - - - - - - - - - iText 2.1.7 by 1T3XT - - - - - - - - - - - - - - - - - - - - - - - - - - - PAGE - - - - - - - - - . - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - INNEN. KÜCHE - TAG - - - - - - - - - - - - - - - - - - - - - - - - Ein Absatz mit Line-Separator: - - Der geht hier auf einer neuen Zeile weiter. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - INNEN. KÜCHE - TAG - - - - - - - - - - - - - - - - - - - - - - - Hier ist die zweite Szene. - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml deleted file mode 100644 index cdfe3bd..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - - - - Tahoma; - - - - - - Tahoma; - - - - - - - Unicode € - - Ω - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testHex.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testHex.xml deleted file mode 100644 index e705a7e..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testHex.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - Calibri; - - - - - Msftedit 5.41.21.2510; - - - - - - - - - - ABC - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml deleted file mode 100644 index 717071d..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - - - 맑은 고딕; - - - - - Riched20 14.0.4750.1000; - - - - - - - - - - - MS Project Addin ProjectPlus - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml deleted file mode 100644 index 5f01281..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - MS Pゴシック; - - - - - - - Tahoma; - - - - - Riched20 5.50.99.2014; - - - - - - - お元気ですか? - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml deleted file mode 100644 index fe0063c..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml +++ /dev/null @@ -1,64 +0,0 @@ - - - - - - - - - - - - - Calibri; - - - - - ; - - - - ; - - - - Msftedit 5.41.21.2510; - - - - - - - - - - From: - - - - - - HYPERLINK "http://unicode-table.com/en/#private-use-area" - - - - - - - - http://unicode-table.com/en/#private-use-area - - - - - - - F700 - - F710 - - F720 - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testSpecialChars.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testSpecialChars.xml deleted file mode 100644 index 6620f83..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testSpecialChars.xml +++ /dev/null @@ -1,69 +0,0 @@ - - - - - - - - - - - - - Calibri; - - - - - Msftedit 5.41.21.2510; - - - - - - - - - - Tab A - - B - - Tab A - - B - - CR A - - B - - LF A - - B - - This is a line break. - - Here is the new line. - - Emdash: — - - Endash: – - - Emspace:   - - Enspace:   - - Qmspace:   - - Bullet: • - - Lquote: ‘ - - Rquote: ’ - - Ldblquote: “ - - Rdblquote: ” - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testStyles.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testStyles.xml deleted file mode 100644 index 0a1bc1a..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testStyles.xml +++ /dev/null @@ -1,64 +0,0 @@ - - - - - - - - - - - - - Calibri; - - - - - - Arial; - - - - - Msftedit 5.41.21.2510; - - - - - - - - - - Test 1 - normal. - - - Test 2 - italic. - - - - Test 3 - bold. - - - Test 4 -normal. - - Test 5 - Calibri 11. - - - Test 6 - Arial 11. - - - Test 7 - Calibri 11. - - - - Test 6 - Arial 12. - - - - Test 7 - Calibri 11. - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml deleted file mode 100644 index d6654af..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - - - - - - - - - - Segoe UI; - - - - - - Segoe UI; - - - - - Riched20 15.0.4567 - - - - - - - - - - Turkish Encoding. - - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUnicode.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUnicode.xml deleted file mode 100644 index 256ffc6..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUnicode.xml +++ /dev/null @@ -1,109 +0,0 @@ - - - - - - - - - - - - - - Sylfaen; - - - - - - Sylfaen; - - - - - - Sylfaen; - - - - - - Shonar Bangla; - - - - - - Microsoft Himalaya; - - - - - - DaunPenh; - - - - - - Euphemia; - - - - - - SimSun; - - - - - - Calibri; - - - - - Msftedit 5.41.21.2510; - - - - - - - - - - - Արամ - - - Johann Strauß - - - Belgi - - ë - - René Magritte - - - বাংলা - - - ་ཡུལ། - - - ប្រទេស​​​កម្ពុជា - - - ᓄᓇᕗᒻᒥᐅᑦ - - - 中国 - - - Česká republika - - - \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUpr.xml b/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUpr.xml deleted file mode 100644 index b4f31d3..0000000 --- a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUpr.xml +++ /dev/null @@ -1,1119 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - 02020603050405020304 - - Times New Roman; - - - - - - - - - - 02000500000000000000 - - Times; - - - - - - - - - - 02040503050406030204 - - Cambria; - - - - - - - - - - 02000503060000020004 - - Optima; - - - - - - - - Lucida Grande; - - - - - - - - - - - 020b0604020202020204 - - Arial; - - - - - - - - - - - 02020603050405020304 - - Times New Roman; - - - - - - - - - - - 020f0502020204030204 - - Calibri; - - - - - - - - - - - 02020603050405020304 - - Times New Roman; - - - - - - - - - - - 02020603050405020304 - - Times New Roman; - - - - - - - - - - - 02020603050405020304 - - Times New Roman; - - - - - - - - - - - 02040503050406030204 - - Cambria; - - - - - - - - - - - 02020603050405020304 - - Times New Roman; - - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Normal; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - heading 1; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - heading 2; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - heading 3; - - - - - - Default Paragraph Font; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Normal Table; - - - - - - - - - - - - - - - - - - - - - - Heading 3 Char; - - - - - - - - - - - - - - - - - - - - - - - Heading 2 Char; - - - - - - - - - - - - - - - - - - - - - Heading 1 Char; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - header; - - - - - - - - - - - - - - Header Char; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Normal (Web); - - - - - - - - - - - - apple-converted-space; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Test - - - - Test - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TEST - - - - - - - - - - - - - - - - http://schemas.microsoft.com/office/word/2003/wordml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 013f - - - - - - - - - - - - - - - - - - - - - - - - . - - - - - - - - - - - . - - - - - - - - - - - . - - - - - - - - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - ( - - - - ) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TEST - - - - - - - - - - - - - - - - - - - - - - - End User License Agreement - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TEST - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/RTF Parser Kit/licence.txt b/licence.txt similarity index 100% rename from RTF Parser Kit/licence.txt rename to licence.txt diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..73adb78 --- /dev/null +++ b/pom.xml @@ -0,0 +1,140 @@ + + 4.0.0 + com.github.joniles + rtfparserkit + 1.12.0 + jar + + RTF Parser Kit + Modular RTF parser + https://github.com/joniles/rtfparserkit + 2013 + + + https://github.com/joniles/rtfparserkit/issues + GitHub Issues + + + + https://github.com/joniles/rtfparserkit + scm:git:git://github.com/joniles/rtfparserkit.git + scm:git:git@github.com:joniles/rtfparserkit.git + + + + + joniles + Jon Iles + jon.iles@bcs.org.uk + Packwood Software + + + + + + Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + + junit + junit + 4.11 + test + + + + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.0 + + 1.7 + 1.7 + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.3 + true + + ossrh + https://oss.sonatype.org/ + true + + + + + + org.apache.maven.plugins + maven-source-plugin + 2.2.1 + + + attach-sources + + jar-no-fork + + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9.1 + + -Xdoclint:none + true + + + + attach-javadocs + + jar + + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.5 + + + sign-artifacts + verify + + sign + + + + + + + + \ No newline at end of file diff --git a/RTF Parser Kit/src/com/rtfparserkit/converter/text/AbstractTextConverter.java b/src/main/java/com/rtfparserkit/converter/text/AbstractTextConverter.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/converter/text/AbstractTextConverter.java rename to src/main/java/com/rtfparserkit/converter/text/AbstractTextConverter.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/converter/text/StreamTextConverter.java b/src/main/java/com/rtfparserkit/converter/text/StreamTextConverter.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/converter/text/StreamTextConverter.java rename to src/main/java/com/rtfparserkit/converter/text/StreamTextConverter.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/converter/text/StringTextConverter.java b/src/main/java/com/rtfparserkit/converter/text/StringTextConverter.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/converter/text/StringTextConverter.java rename to src/main/java/com/rtfparserkit/converter/text/StringTextConverter.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/IRtfListener.java b/src/main/java/com/rtfparserkit/parser/IRtfListener.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/IRtfListener.java rename to src/main/java/com/rtfparserkit/parser/IRtfListener.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/IRtfParser.java b/src/main/java/com/rtfparserkit/parser/IRtfParser.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/IRtfParser.java rename to src/main/java/com/rtfparserkit/parser/IRtfParser.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/IRtfSource.java b/src/main/java/com/rtfparserkit/parser/IRtfSource.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/IRtfSource.java rename to src/main/java/com/rtfparserkit/parser/IRtfSource.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/RtfListenerAdaptor.java b/src/main/java/com/rtfparserkit/parser/RtfListenerAdaptor.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/RtfListenerAdaptor.java rename to src/main/java/com/rtfparserkit/parser/RtfListenerAdaptor.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/RtfStreamSource.java b/src/main/java/com/rtfparserkit/parser/RtfStreamSource.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/RtfStreamSource.java rename to src/main/java/com/rtfparserkit/parser/RtfStreamSource.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/RtfStringSource.java b/src/main/java/com/rtfparserkit/parser/RtfStringSource.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/RtfStringSource.java rename to src/main/java/com/rtfparserkit/parser/RtfStringSource.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/raw/ByteBuffer.java b/src/main/java/com/rtfparserkit/parser/raw/ByteBuffer.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/raw/ByteBuffer.java rename to src/main/java/com/rtfparserkit/parser/raw/ByteBuffer.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/raw/RawRtfParser.java b/src/main/java/com/rtfparserkit/parser/raw/RawRtfParser.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/raw/RawRtfParser.java rename to src/main/java/com/rtfparserkit/parser/raw/RawRtfParser.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/BinaryBytesEvent.java b/src/main/java/com/rtfparserkit/parser/standard/BinaryBytesEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/BinaryBytesEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/BinaryBytesEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/CommandEvent.java b/src/main/java/com/rtfparserkit/parser/standard/CommandEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/CommandEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/CommandEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/DefaultEventHandler.java b/src/main/java/com/rtfparserkit/parser/standard/DefaultEventHandler.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/DefaultEventHandler.java rename to src/main/java/com/rtfparserkit/parser/standard/DefaultEventHandler.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/DocumentEndEvent.java b/src/main/java/com/rtfparserkit/parser/standard/DocumentEndEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/DocumentEndEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/DocumentEndEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/DocumentStartEvent.java b/src/main/java/com/rtfparserkit/parser/standard/DocumentStartEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/DocumentStartEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/DocumentStartEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java b/src/main/java/com/rtfparserkit/parser/standard/Encoding.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java rename to src/main/java/com/rtfparserkit/parser/standard/Encoding.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/FontCharset.java b/src/main/java/com/rtfparserkit/parser/standard/FontCharset.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/FontCharset.java rename to src/main/java/com/rtfparserkit/parser/standard/FontCharset.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/GroupEndEvent.java b/src/main/java/com/rtfparserkit/parser/standard/GroupEndEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/GroupEndEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/GroupEndEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/GroupStartEvent.java b/src/main/java/com/rtfparserkit/parser/standard/GroupStartEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/GroupStartEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/GroupStartEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/IParserEvent.java b/src/main/java/com/rtfparserkit/parser/standard/IParserEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/IParserEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/IParserEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/IParserEventHandler.java b/src/main/java/com/rtfparserkit/parser/standard/IParserEventHandler.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/IParserEventHandler.java rename to src/main/java/com/rtfparserkit/parser/standard/IParserEventHandler.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserEventType.java b/src/main/java/com/rtfparserkit/parser/standard/ParserEventType.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserEventType.java rename to src/main/java/com/rtfparserkit/parser/standard/ParserEventType.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java b/src/main/java/com/rtfparserkit/parser/standard/ParserState.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/ParserState.java rename to src/main/java/com/rtfparserkit/parser/standard/ParserState.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java b/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/StandardRtfParser.java rename to src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/StringEvent.java b/src/main/java/com/rtfparserkit/parser/standard/StringEvent.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/StringEvent.java rename to src/main/java/com/rtfparserkit/parser/standard/StringEvent.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/parser/standard/UprHandler.java b/src/main/java/com/rtfparserkit/parser/standard/UprHandler.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/parser/standard/UprHandler.java rename to src/main/java/com/rtfparserkit/parser/standard/UprHandler.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/rtf/Command.java b/src/main/java/com/rtfparserkit/rtf/Command.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/rtf/Command.java rename to src/main/java/com/rtfparserkit/rtf/Command.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/rtf/CommandType.java b/src/main/java/com/rtfparserkit/rtf/CommandType.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/rtf/CommandType.java rename to src/main/java/com/rtfparserkit/rtf/CommandType.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/utils/HexUtils.java b/src/main/java/com/rtfparserkit/utils/HexUtils.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/utils/HexUtils.java rename to src/main/java/com/rtfparserkit/utils/HexUtils.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDump.java b/src/main/java/com/rtfparserkit/utils/RtfDump.java similarity index 100% rename from RTF Parser Kit/src/com/rtfparserkit/utils/RtfDump.java rename to src/main/java/com/rtfparserkit/utils/RtfDump.java diff --git a/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDumpListener.java b/src/main/java/com/rtfparserkit/utils/RtfDumpListener.java similarity index 88% rename from RTF Parser Kit/src/com/rtfparserkit/utils/RtfDumpListener.java rename to src/main/java/com/rtfparserkit/utils/RtfDumpListener.java index a610428..734b611 100644 --- a/RTF Parser Kit/src/com/rtfparserkit/utils/RtfDumpListener.java +++ b/src/main/java/com/rtfparserkit/utils/RtfDumpListener.java @@ -24,17 +24,12 @@ import com.rtfparserkit.parser.IRtfListener; import com.rtfparserkit.rtf.Command; -import com.sun.xml.internal.txw2.output.IndentingXMLStreamWriter; /** * Trivial class used to convert events generated by an RTF parser into an XML document. * The primary purpose of this code is to debug the parser output, and provide a * convenient method for comparing expected and actual parser behaviour in test cases. - * - * Note that we're using an internal Sun class to produce indented XML. Not strictly - * necessary, but it makes the output more readable. */ -@SuppressWarnings("restriction") public class RtfDumpListener implements IRtfListener { /** @@ -43,7 +38,7 @@ public class RtfDumpListener implements IRtfListener public RtfDumpListener(OutputStream stream) throws XMLStreamException { - writer = new IndentingXMLStreamWriter(XMLOutputFactory.newInstance().createXMLStreamWriter(stream, "UTF-8")); + writer = XMLOutputFactory.newInstance().createXMLStreamWriter(stream, "UTF-8"); } /** diff --git a/RTF Parser Kit/test/com/rtfparserkit/converter/text/StreamTextConverterTest.java b/src/test/java/com/rtfparserkit/converter/text/StreamTextConverterTest.java similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/converter/text/StreamTextConverterTest.java rename to src/test/java/com/rtfparserkit/converter/text/StreamTextConverterTest.java diff --git a/RTF Parser Kit/test/com/rtfparserkit/converter/text/StringTextConverterTest.java b/src/test/java/com/rtfparserkit/converter/text/StringTextConverterTest.java similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/converter/text/StringTextConverterTest.java rename to src/test/java/com/rtfparserkit/converter/text/StringTextConverterTest.java diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/ByteBufferTest.java b/src/test/java/com/rtfparserkit/parser/raw/ByteBufferTest.java similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/raw/ByteBufferTest.java rename to src/test/java/com/rtfparserkit/parser/raw/ByteBufferTest.java diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/RawRtfParserTest.java b/src/test/java/com/rtfparserkit/parser/raw/RawRtfParserTest.java similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/raw/RawRtfParserTest.java rename to src/test/java/com/rtfparserkit/parser/raw/RawRtfParserTest.java diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java rename to src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java diff --git a/RTF Parser Kit/test/com/rtfparserkit/utils/TestUtilities.java b/src/test/java/com/rtfparserkit/utils/TestUtilities.java similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/utils/TestUtilities.java rename to src/test/java/com/rtfparserkit/utils/TestUtilities.java diff --git a/RTF Parser Kit/test/com/rtfparserkit/converter/text/data/testTextConversion.rtf b/src/test/resources/com/rtfparserkit/converter/text/data/testTextConversion.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/converter/text/data/testTextConversion.rtf rename to src/test/resources/com/rtfparserkit/converter/text/data/testTextConversion.rtf diff --git a/RTF Parser Kit/test/com/rtfparserkit/converter/text/data/testTextConversion.txt b/src/test/resources/com/rtfparserkit/converter/text/data/testTextConversion.txt similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/converter/text/data/testTextConversion.txt rename to src/test/resources/com/rtfparserkit/converter/text/data/testTextConversion.txt diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testRawParse.rtf b/src/test/resources/com/rtfparserkit/parser/raw/data/testRawParse.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testRawParse.rtf rename to src/test/resources/com/rtfparserkit/parser/raw/data/testRawParse.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/raw/data/testRawParse.xml b/src/test/resources/com/rtfparserkit/parser/raw/data/testRawParse.xml new file mode 100644 index 0000000..e1e8aef --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/raw/data/testRawParse.xml @@ -0,0 +1 @@ +Calibri;Msftedit 5.41.21.2510;Test1 \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testSpecialChars.rtf b/src/test/resources/com/rtfparserkit/parser/raw/data/testSpecialChars.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/raw/data/testSpecialChars.rtf rename to src/test/resources/com/rtfparserkit/parser/raw/data/testSpecialChars.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/raw/data/testSpecialChars.xml b/src/test/resources/com/rtfparserkit/parser/raw/data/testSpecialChars.xml new file mode 100644 index 0000000..e5e1318 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/raw/data/testSpecialChars.xml @@ -0,0 +1 @@ +Calibri;Msftedit 5.41.21.2510;Tab ABTab ABCR ABLF ABThis is a line break.Here is the new line.Emdash: Endash: Emspace: Enspace: Qmspace: Bullet: Lquote: Rquote: Ldblquote: Rdblquote: \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/test10001Encoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/test10001Encoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/test10001Encoding.xml new file mode 100644 index 0000000..91a618f --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/test10001Encoding.xml @@ -0,0 +1 @@ +HiraKakuPro-W6;Helvetica-Bold;Helvetica;HiraKakuPro-W3;;;{disc}.;;;お読みください \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/test10007Encoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/test10007Encoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/test10007Encoding.xml new file mode 100644 index 0000000..af517a5 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/test10007Encoding.xml @@ -0,0 +1 @@ +LucidaGrande;Georgia;Verdana;;;Комманда разработчиков \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/test437Encoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test437Encoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/test437Encoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/test437Encoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/test437Encoding.xml new file mode 100644 index 0000000..b137d5e --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/test437Encoding.xml @@ -0,0 +1 @@ +Times New RomanTimes;Helvetica-BoldHelvetica;Helvetica;LucidaGrandeLucida Grande;;;The quick brown fox jumped over the lazy dogs.footnote reference;The quick brown fox jumped over the lazy dogs.endnote reference;Some text goes here so you can see what your style will look like.footnote text;Sample text for Foot/End Notes Styleendnote text;Test DocumentTest Title: (Test Subtitle)Test Heading: Test Text. \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/test874Encoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test874Encoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/test874Encoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/test874Encoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/test874Encoding.xml new file mode 100644 index 0000000..350ee7d --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/test874Encoding.xml @@ -0,0 +1 @@ +02020603050405020304Times New Roman;020b0604020202020204Arial;02020603050405020304Angsana New;00000000000000000000SymbolMT;Times New Roman CE;Times New Roman Cyr;Times New Roman Greek;Times New Roman Tur;Times New Roman (Hebrew);Times New Roman (Arabic);Times New Roman Baltic;Times New Roman (Vietnamese);Arial CE;Arial Cyr;Arial Greek;Arial Tur;Arial (Hebrew);Arial (Arabic);Arial Baltic;Arial (Vietnamese);Angsana New (Thai);;;;;;;;;;;;;;;;;;Normal;Default Paragraph Font;Normal Table;Microsoft Word 11.0.5604;3Carestream Health Inc.SAMAK ...)()()()()()1. Test Heading Some test text Some more test text \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/test950Encoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/test950Encoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/test950Encoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/test950Encoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/test950Encoding.xml new file mode 100644 index 0000000..19f010f --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/test950Encoding.xml @@ -0,0 +1 @@ +LucidaGrande;;;TitleTest AuthorTest TextCopyright © 2004-2013 Test Project \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml new file mode 100644 index 0000000..0071494 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testDefaultEncodingParse.xml @@ -0,0 +1 @@ +Test1 \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testEncodingParse.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testEncodingParse.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testEncodingParse.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testEncodingParse.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testEncodingParse.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testEncodingParse.xml new file mode 100644 index 0000000..bd8d6f6 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testEncodingParse.xml @@ -0,0 +1 @@ +Calibri;Msftedit 5.41.21.2510;Test1 \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testGitHubIssue6.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml new file mode 100644 index 0000000..da7cb04 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testGitHubIssue6.xml @@ -0,0 +1 @@ +Times New Roman;Arial;Courier;;;Normal;heading 3;heading 2;heading 1;iText 2.1.7 by 1T3XTPAGE . INNEN. KÜCHE - TAGEin Absatz mit Line-Separator:Der geht hier auf einer neuen Zeile weiter.INNEN. KÜCHE - TAGHier ist die zweite Szene. \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testGreekEncoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml new file mode 100644 index 0000000..75a5ba8 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testGreekEncoding.xml @@ -0,0 +1 @@ +Tahoma;Tahoma;Unicode €Ω \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testHex.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testHex.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testHex.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testHex.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testHex.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testHex.xml new file mode 100644 index 0000000..61d8f4b --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testHex.xml @@ -0,0 +1 @@ +Calibri;Msftedit 5.41.21.2510;ABC \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testKoreanEncoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml new file mode 100644 index 0000000..d5dba01 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testKoreanEncoding.xml @@ -0,0 +1 @@ +맑은 고딕;Riched20 14.0.4750.1000;MS Project Addin ProjectPlus \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testMultiByteHex.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testMultiByteHex.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testMultiByteHex.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testMultiByteHex.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml new file mode 100644 index 0000000..2967adb --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testMultiByteHex.xml @@ -0,0 +1 @@ +MS Pゴシック;Tahoma;Riched20 5.50.99.2014;お元気ですか? \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testNegativeUnicode.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testNegativeUnicode.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testNegativeUnicode.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testNegativeUnicode.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml new file mode 100644 index 0000000..38ce454 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testNegativeUnicode.xml @@ -0,0 +1 @@ +Calibri;;;Msftedit 5.41.21.2510;From: HYPERLINK "http://unicode-table.com/en/#private-use-area"http://unicode-table.com/en/#private-use-areaF700F710F720 \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testSpecialChars.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testSpecialChars.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testSpecialChars.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testSpecialChars.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testSpecialChars.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testSpecialChars.xml new file mode 100644 index 0000000..0fa5992 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testSpecialChars.xml @@ -0,0 +1 @@ +Calibri;Msftedit 5.41.21.2510;Tab ABTab ABCR ABLF ABThis is a line break.Here is the new line.Emdash: —Endash: –Emspace:  Enspace:  Qmspace:  Bullet: •Lquote: ‘Rquote: ’Ldblquote: “Rdblquote: ” \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testStyles.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testStyles.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testStyles.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testStyles.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testStyles.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testStyles.xml new file mode 100644 index 0000000..be68899 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testStyles.xml @@ -0,0 +1 @@ +Calibri;Arial;Msftedit 5.41.21.2510;Test 1 - normal.Test 2 - italic.Test 3 - bold.Test 4 -normal.Test 5 - Calibri 11.Test 6 - Arial 11.Test 7 - Calibri 11.Test 6 - Arial 12.Test 7 - Calibri 11. \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testTurkishEncoding.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml new file mode 100644 index 0000000..913eb3d --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testTurkishEncoding.xml @@ -0,0 +1 @@ +Segoe UI;Segoe UI;Riched20 15.0.4567Turkish Encoding. \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUnicode.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testUnicode.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUnicode.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testUnicode.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testUnicode.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testUnicode.xml new file mode 100644 index 0000000..9241d2b --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testUnicode.xml @@ -0,0 +1 @@ +Sylfaen;Sylfaen;Sylfaen;Shonar Bangla;Microsoft Himalaya;DaunPenh;Euphemia;SimSun;Calibri;Msftedit 5.41.21.2510;ԱրամJohann StraußBelgiëRené Magritteবাংলা་ཡུལ།ប្រទេស​​​កម្ពុជាᓄᓇᕗᒻᒥᐅᑦ中国Česká republika \ No newline at end of file diff --git a/RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUpr.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testUpr.rtf similarity index 100% rename from RTF Parser Kit/test/com/rtfparserkit/parser/standard/data/testUpr.rtf rename to src/test/resources/com/rtfparserkit/parser/standard/data/testUpr.rtf diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testUpr.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testUpr.xml new file mode 100644 index 0000000..aa23c5f --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testUpr.xml @@ -0,0 +1 @@ +02020603050405020304Times New Roman;02000500000000000000Times;02040503050406030204Cambria;02000503060000020004Optima;Lucida Grande;020b0604020202020204Arial;02020603050405020304Times New Roman;020f0502020204030204Calibri;02020603050405020304Times New Roman;02020603050405020304Times New Roman;02020603050405020304Times New Roman;02040503050406030204Cambria;02020603050405020304Times New Roman;;;;;;;;;;;;;;;;;;;Normal;heading 1;heading 2;heading 3;Default Paragraph Font;Normal Table;Heading 3 Char;Heading 2 Char;Heading 1 Char;header;Header Char;Normal (Web);apple-converted-space;TestTestTESThttp://schemas.microsoft.com/office/word/2003/wordml013f...)()()()()()TESTEnd User License AgreementTEST \ No newline at end of file From 16c286a3480e4ef6a58f61e6d4c24f75b47606f5 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Thu, 8 Nov 2018 00:56:24 +0000 Subject: [PATCH 20/30] Target Java 1.6 --- .classpath | 2 +- .settings/org.eclipse.jdt.core.prefs | 6 +++--- pom.xml | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.classpath b/.classpath index 065c2a8..b9a9319 100644 --- a/.classpath +++ b/.classpath @@ -19,7 +19,7 @@ - + diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs index 1e9dd93..809653a 100644 --- a/.settings/org.eclipse.jdt.core.prefs +++ b/.settings/org.eclipse.jdt.core.prefs @@ -1,6 +1,6 @@ eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 -org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 +org.eclipse.jdt.core.compiler.compliance=1.6 org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning org.eclipse.jdt.core.compiler.release=disabled -org.eclipse.jdt.core.compiler.source=1.7 +org.eclipse.jdt.core.compiler.source=1.6 diff --git a/pom.xml b/pom.xml index 73adb78..1dc4c9b 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.github.joniles rtfparserkit - 1.12.0 + 1.13.0 jar RTF Parser Kit @@ -67,8 +67,8 @@ maven-compiler-plugin 3.0 - 1.7 - 1.7 + 1.6 + 1.6 From d609b5c9b1305b1e5ebd24203fab1169e83c6259 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Sun, 29 Mar 2020 18:50:40 +0100 Subject: [PATCH 21/30] Handle cpg command. Handle implicit use of font 0. --- pom.xml | 2 +- .../parser/standard/ParserState.java | 2 ++ .../parser/standard/StandardRtfParser.java | 34 +++++++++++++++--- .../standard/StandardRtfParserTest.java | 12 +++++++ .../standard/data/testJapaneseJisEncoding.rtf | Bin 0 -> 134 bytes .../standard/data/testJapaneseJisEncoding.xml | 1 + .../data/testJapaneseUtf8Encoding.rtf | Bin 0 -> 143 bytes .../data/testJapaneseUtf8Encoding.xml | 1 + 8 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncoding.rtf create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncoding.xml create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseUtf8Encoding.rtf create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseUtf8Encoding.xml diff --git a/pom.xml b/pom.xml index 73adb78..52109be 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.github.joniles rtfparserkit - 1.12.0 + 1.14.0 jar RTF Parser Kit diff --git a/src/main/java/com/rtfparserkit/parser/standard/ParserState.java b/src/main/java/com/rtfparserkit/parser/standard/ParserState.java index c45a8c1..d7c310e 100644 --- a/src/main/java/com/rtfparserkit/parser/standard/ParserState.java +++ b/src/main/java/com/rtfparserkit/parser/standard/ParserState.java @@ -28,12 +28,14 @@ public ParserState() public ParserState(ParserState state) { + currentFontExplicitlySet = state.currentFontExplicitlySet; currentFont = state.currentFont; currentEncoding = state.currentEncoding; currentFontEncoding = state.currentFontEncoding; unicodeAlternateSkipCount = state.unicodeAlternateSkipCount; } + public boolean currentFontExplicitlySet = false; public int currentFont; public String currentEncoding = Encoding.ANSI_ENCODING; public String currentFontEncoding; diff --git a/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java b/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java index 7991d78..652503e 100644 --- a/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java +++ b/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java @@ -100,6 +100,12 @@ public void processCharacterBytes(byte[] data) */ private String currentEncoding() { + // Assume font 0 if a font has not been set explicitly + if (!state.currentFontExplicitlySet) + { + state.currentFontExplicitlySet = true; + state.currentFontEncoding = m_fontEncodings.get(Integer.valueOf(0)); + } return state.currentFontEncoding == null ? state.currentEncoding : state.currentFontEncoding; } @@ -275,6 +281,13 @@ public void processCommand(Command command, int parameter, boolean hasParameter, break; } + case cpg: + { + processFontCodepage(parameter); + handleCommand(command, parameter, hasParameter, optionalFlag); + break; + } + default: { handleCommand(command, parameter, hasParameter, optionalFlag); @@ -298,13 +311,26 @@ private void processFont(int parameter) */ private void processFontCharset(int parameter) { - String charset = FontCharset.getCharset(parameter); + setFontEncoding(FontCharset.getCharset(parameter)); + } + + private void processFontCodepage(int parameter) + { + setFontEncoding(Integer.toString(parameter)); + } + + private void setFontEncoding(String charset) + { if (charset != null) { - m_fontEncodings.put(Integer.valueOf(state.currentFont), Encoding.LOCALEID_MAPPING.get(charset)); - } + String encoding = Encoding.LOCALEID_MAPPING.get(charset); + if (encoding != null) + { + m_fontEncodings.put(Integer.valueOf(state.currentFont), encoding); + } + } } - + /** * Switch the encoding based on the RTF command received. */ diff --git a/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index b769a92..f4d2cef 100644 --- a/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -130,4 +130,16 @@ public void testKoreanEncoding() throws Exception { TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testKoreanEncoding"); } + + @Test + public void testJapaneseJisEncoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testJapaneseJisEncoding"); + } + + @Test + public void testJapaneseUtf8Encoding() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testJapaneseUtf8Encoding"); + } } diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncoding.rtf new file mode 100644 index 0000000000000000000000000000000000000000..e8c8db4f678c3800ded9358d01a98182f01383a8 GIT binary patch literal 134 zcmW-Yu@1s85JQnpZkkR?Q9_oiHYwJ@q%4iwc>+H>;i{Arial Unicode MS;中國 \ No newline at end of file diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseUtf8Encoding.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseUtf8Encoding.rtf new file mode 100644 index 0000000000000000000000000000000000000000..8a08ab6891c8b808928034e4ffac7fa04dd81b60 GIT binary patch literal 143 zcmW-ZyAHx207adf^dHz*fr`O6^#h!Y+g*585`>20)b!s)mz$G&lCz_>z@hUN|6|vM z3n}1y2n@M=qAl-Lx8IR;UO1bk9iimbOzOZ8a!d&JrR(M!7YaRbk&JON*QgVD=ALk# b+#k2&7za>)1~m#)yUr5yYC)A5hb~WFh@3C8 literal 0 HcmV?d00001 diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseUtf8Encoding.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseUtf8Encoding.xml new file mode 100644 index 0000000..d381adc --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseUtf8Encoding.xml @@ -0,0 +1 @@ +Arial Unicode MS;中國 \ No newline at end of file From c5f98dfcdd8722acf2fe990e8e0b4b41d4ac42e7 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Tue, 31 Mar 2020 17:24:13 +0100 Subject: [PATCH 22/30] Record when the font has been set explicitly. --- .../com/rtfparserkit/parser/standard/StandardRtfParser.java | 1 + .../rtfparserkit/parser/standard/StandardRtfParserTest.java | 6 ++++++ .../standard/data/testJapaneseJisEncodingTwoFonts.rtf | 3 +++ .../standard/data/testJapaneseJisEncodingTwoFonts.xml | 1 + 4 files changed, 11 insertions(+) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.rtf create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.xml diff --git a/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java b/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java index 652503e..4f320e6 100644 --- a/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java +++ b/src/main/java/com/rtfparserkit/parser/standard/StandardRtfParser.java @@ -302,6 +302,7 @@ public void processCommand(Command command, int parameter, boolean hasParameter, */ private void processFont(int parameter) { + state.currentFontExplicitlySet = true; state.currentFont = parameter; state.currentFontEncoding = m_fontEncodings.get(Integer.valueOf(parameter)); } diff --git a/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index f4d2cef..6fef53c 100644 --- a/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -142,4 +142,10 @@ public void testJapaneseUtf8Encoding() throws Exception { TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testJapaneseUtf8Encoding"); } + + @Test + public void testJapaneseJisEncodingTwoFonts() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testJapaneseJisEncodingTwoFonts"); + } } diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.rtf new file mode 100644 index 0000000..3ed21ea --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.rtf @@ -0,0 +1,3 @@ +{\rtf1\ansi\ansicpg1252\deff0\nouicompat\deflang1033{\fonttbl{\f0\fnil\fcharset0 Times New Roman;}{\f1\fnil\fcharset128 Arial Unicode MS;}} +\f1\fs24\'92\'86\'9a\'a0 +} \ No newline at end of file diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.xml new file mode 100644 index 0000000..502f074 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testJapaneseJisEncodingTwoFonts.xml @@ -0,0 +1 @@ +Times New Roman;Arial Unicode MS;中國 \ No newline at end of file From 5d6f9096cc52a8bf7a78f8209d0c101d50d3ca62 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Tue, 31 Mar 2020 17:25:30 +0100 Subject: [PATCH 23/30] Bump version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 341a49c..afcd60f 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.github.joniles rtfparserkit - 1.14.0 + 1.15.0 jar RTF Parser Kit From a3ae64012a4cf117316f42c1855ff68809b7930a Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 10 Feb 2021 13:04:56 +0000 Subject: [PATCH 24/30] Update JUnit version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index afcd60f..d61e25a 100644 --- a/pom.xml +++ b/pom.xml @@ -45,7 +45,7 @@ junit junit - 4.11 + 4.13.1 test From 3119eef05f1c517df8298b0cfc6c7b469739b025 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 10 Feb 2021 13:07:28 +0000 Subject: [PATCH 25/30] Use MS932 to support NEC special characters --- .../java/com/rtfparserkit/parser/standard/Encoding.java | 2 +- .../parser/standard/StandardRtfParserTest.java | 6 ++++++ .../parser/standard/data/testNecCharacters.rtf | 7 +++++++ .../parser/standard/data/testNecCharacters.xml | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.rtf create mode 100644 src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.xml diff --git a/src/main/java/com/rtfparserkit/parser/standard/Encoding.java b/src/main/java/com/rtfparserkit/parser/standard/Encoding.java index 7c9269d..e29c48d 100644 --- a/src/main/java/com/rtfparserkit/parser/standard/Encoding.java +++ b/src/main/java/com/rtfparserkit/parser/standard/Encoding.java @@ -59,7 +59,7 @@ class Encoding // 870 IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 LOCALEID_MAPPING.put("874", "Cp874"); // windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows) // 875 cp875 IBM EBCDIC Greek Modern - LOCALEID_MAPPING.put("932", "SJIS"); // Japanese + LOCALEID_MAPPING.put("932", "MS932"); // Japanese LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese LOCALEID_MAPPING.put("949", "Cp949"); // Korean LOCALEID_MAPPING.put("950", "Cp950"); // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) diff --git a/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java b/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java index 6fef53c..014f3c9 100644 --- a/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java +++ b/src/test/java/com/rtfparserkit/parser/standard/StandardRtfParserTest.java @@ -148,4 +148,10 @@ public void testJapaneseJisEncodingTwoFonts() throws Exception { TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testJapaneseJisEncodingTwoFonts"); } + + @Test + public void testNecCharacters() throws Exception + { + TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testNecCharacters"); + } } diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.rtf b/src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.rtf new file mode 100644 index 0000000..8247697 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.rtf @@ -0,0 +1,7 @@ +{\rtf1\ansi\ansicpg932\deff0\deflang1033\deflangfe1041{\fonttbl{\f0\fnil\fcharset0 MS Sans Serif;} +{\f1\froman\fprq1\fcharset128 MS UI Gothic;}} +{\colortbl;\red255\green0\blue0;\red0\green0\blue255;} +\viewkind4\uc1\pard\cf1\lang1041\f0\fs17 BLC U=>L Splice \f1\fs18\'82\'c5U/W No.2 Dancer +\'82\'a9\'82\'e7\'83\'56\'83\'8f\'94\'ad\'90\'b6\'81\'42Set\'8e\'9e\'82\'c9\'95\'5c\'91\'7710\'87\'6f\'82\'d9\'82\'c7\'83\'80\'81\'5b\'83\'6a\'83\'93\'83\'4f\'81\'40 +pallet\'92\'ea\'82\'cc +Roll\cf2\f0\fs17 \par } \ No newline at end of file diff --git a/src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.xml b/src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.xml new file mode 100644 index 0000000..0b93523 --- /dev/null +++ b/src/test/resources/com/rtfparserkit/parser/standard/data/testNecCharacters.xml @@ -0,0 +1 @@ +MS Sans Serif;MS UI Gothic; ;;;BLC U=>L Splice でU/W No.2 Dancerからシワ発生。Set時に表層10㎜ほどムーニング pallet底のRoll \ No newline at end of file From cd420e7cca548fa70387c057614de12a07deddb8 Mon Sep 17 00:00:00 2001 From: Jon Iles Date: Wed, 10 Feb 2021 13:07:57 +0000 Subject: [PATCH 26/30] Update to 1.16.0 --- README.md | 112 +++++++++++++++++++++++++++--------------------------- pom.xml | 2 +- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 1dfc7b3..6d69881 100644 --- a/README.md +++ b/README.md @@ -1,56 +1,56 @@ -RTF Parser Kit -============== - -I have often been frustrated by the lack of comprehensive support for working with RTF in Java, and the need to use RTF parsers which are incomplete and form part of larger projects whose libraries I don't want to import just to use the RTF parser. The RTF Parser Kit project is an attempt to address these points. - -The idea is to provide a "kit" of components which can either be used "as-is", for example to extract plain text or HTML from an RTF file, or can be used as a component in a larger application which requires the capability to parse RTF documents. - -What's currently included? --------------------------- -* Raw RTF Parser - parses RTF, sends events representing content to a listener. Performs minimal processing - you get the RTF commands and data exactly as they appear in the file. -* Standard RTF Parser - parses RTF, sends events representing content to a listener. Handles character encoding, Unicode and so on, so you don't have to. This is probably the parser you want to use. -* Text Converter - demonstrates very simple text extraction from an RTF file -* RTF Dump - another demonstration, this time writing the RTF file contents as XML - -Getting Started -=============== - -To install the library, you can either download the latest JAR directly from the GitHub releases page, -or you can add RTF Parser Kit as a dependency using Maven: - -```xml - - com.github.joniles - rtfparserkit - 1.12.0 - -``` - -Once you have the library, you have a choice of two parsers to work with, the standard parser and the raw parser. The raw parser carries out minimal processing on the RTF, the standard parser handles character encodings, and translates commands which represent special characters into their Unicode equivalents. Most people will want to use the standard parser. - -The parser is invoked like this: -```java -InputStream is = new FileInputStream("/path/to/my/file.rtf"); -IRtfSource source = new RtfStreamSource(is) -IRtfParser parser = new StandardRtfParser(); -MyRtfListener listener = new MyRtfListener(); -parser.parse(source, listener); -``` -You provide input to the parser via a class that implements the `IRtfSource` interface. Two implementations are provided for you, `RtfStreamSource`, for reading RTF from a stream, and `RtfStringSource` for reading RTF from a string. - -The other thing you need to provide the parser with is a listener class. The listener class implements the `IRtfListener` listener interface. The interface consists of a set of methods which are called by the parser to inform you of when it encounters different parts of the docuent structure. The set of method, along with some comments describing their purpose can be seen [here](https://github.com/joniles/rtfparserkit/blob/master/RTF%20Parser%20Kit/src/com/rtfparserkit/parser/IRtfListener.java). - -You don't need to implement all of the `IRtfListener` interface yourself, if you wish you can subclass `RtfListenerAdaptor` which provides empty methods for all of the `IRtfListener` methods. You can then just override the methods you are interested in. - -An example text extractor is provided, you can invoke it like this: -```java -new StreamTextConverter().convert(new RtfStreamSource(inputStream), outputStream, "UTF-8"); -``` -This code reads an RTF file from the `inputStream` and writes the resulting text to the `outputStream` in the encoding specified by the last argument. - -A second example text extractor is also provided, this one extracts text from the RTF file into a string: -```java -StringTextConverter converter = new StringTextConverter(); -converter.convert(new RtfStreamSource(inputStream)); -String extractedText = converter.getText(); -``` +RTF Parser Kit +============== + +I have often been frustrated by the lack of comprehensive support for working with RTF in Java, and the need to use RTF parsers which are incomplete and form part of larger projects whose libraries I don't want to import just to use the RTF parser. The RTF Parser Kit project is an attempt to address these points. + +The idea is to provide a "kit" of components which can either be used "as-is", for example to extract plain text or HTML from an RTF file, or can be used as a component in a larger application which requires the capability to parse RTF documents. + +What's currently included? +-------------------------- +* Raw RTF Parser - parses RTF, sends events representing content to a listener. Performs minimal processing - you get the RTF commands and data exactly as they appear in the file. +* Standard RTF Parser - parses RTF, sends events representing content to a listener. Handles character encoding, Unicode and so on, so you don't have to. This is probably the parser you want to use. +* Text Converter - demonstrates very simple text extraction from an RTF file +* RTF Dump - another demonstration, this time writing the RTF file contents as XML + +Getting Started +=============== + +To install the library, you can either download the latest JAR directly from the GitHub releases page, +or you can add RTF Parser Kit as a dependency using Maven: + +```xml + + com.github.joniles + rtfparserkit + 1.16.0 + +``` + +Once you have the library, you have a choice of two parsers to work with, the standard parser and the raw parser. The raw parser carries out minimal processing on the RTF, the standard parser handles character encodings, and translates commands which represent special characters into their Unicode equivalents. Most people will want to use the standard parser. + +The parser is invoked like this: +```java +InputStream is = new FileInputStream("/path/to/my/file.rtf"); +IRtfSource source = new RtfStreamSource(is) +IRtfParser parser = new StandardRtfParser(); +MyRtfListener listener = new MyRtfListener(); +parser.parse(source, listener); +``` +You provide input to the parser via a class that implements the `IRtfSource` interface. Two implementations are provided for you, `RtfStreamSource`, for reading RTF from a stream, and `RtfStringSource` for reading RTF from a string. + +The other thing you need to provide the parser with is a listener class. The listener class implements the `IRtfListener` listener interface. The interface consists of a set of methods which are called by the parser to inform you of when it encounters different parts of the docuent structure. The set of method, along with some comments describing their purpose can be seen [here](https://github.com/joniles/rtfparserkit/blob/master/RTF%20Parser%20Kit/src/com/rtfparserkit/parser/IRtfListener.java). + +You don't need to implement all of the `IRtfListener` interface yourself, if you wish you can subclass `RtfListenerAdaptor` which provides empty methods for all of the `IRtfListener` methods. You can then just override the methods you are interested in. + +An example text extractor is provided, you can invoke it like this: +```java +new StreamTextConverter().convert(new RtfStreamSource(inputStream), outputStream, "UTF-8"); +``` +This code reads an RTF file from the `inputStream` and writes the resulting text to the `outputStream` in the encoding specified by the last argument. + +A second example text extractor is also provided, this one extracts text from the RTF file into a string: +```java +StringTextConverter converter = new StringTextConverter(); +converter.convert(new RtfStreamSource(inputStream)); +String extractedText = converter.getText(); +``` diff --git a/pom.xml b/pom.xml index d61e25a..1be76e8 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.github.joniles rtfparserkit - 1.15.0 + 1.16.0 jar RTF Parser Kit From 5dbd607c21247a010e2ece1e206f947ca94ec46e Mon Sep 17 00:00:00 2001 From: joniles Date: Thu, 21 Apr 2022 19:26:01 +0100 Subject: [PATCH 27/30] Add IDEA project --- .idea/.gitignore | 3 +++ .idea/compiler.xml | 13 +++++++++++++ .idea/jarRepositories.xml | 20 ++++++++++++++++++++ .idea/misc.xml | 12 ++++++++++++ .idea/vcs.xml | 6 ++++++ 5 files changed, 54 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/compiler.xml create mode 100644 .idea/jarRepositories.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..984ff77 --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..712ab9d --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..d5cd614 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,12 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file From 2f015b9f9f37f7ce9911577bc8bfa3f5fd76149f Mon Sep 17 00:00:00 2001 From: joniles Date: Thu, 21 Apr 2022 19:26:35 +0100 Subject: [PATCH 28/30] Add FUNDING.yml --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..0d76043 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: joniles \ No newline at end of file From 4f76c667b9e86c2e6d5d18f6f152933a13c917ed Mon Sep 17 00:00:00 2001 From: joniles Date: Thu, 20 Oct 2022 16:57:28 +0100 Subject: [PATCH 29/30] Add image dump example --- .../com/rtfparserkit/utils/ImageDump.java | 35 +++++++++++ .../com/rtfparserkit/utils/ImageListener.java | 60 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 src/main/java/com/rtfparserkit/utils/ImageDump.java create mode 100644 src/main/java/com/rtfparserkit/utils/ImageListener.java diff --git a/src/main/java/com/rtfparserkit/utils/ImageDump.java b/src/main/java/com/rtfparserkit/utils/ImageDump.java new file mode 100644 index 0000000..9d754ee --- /dev/null +++ b/src/main/java/com/rtfparserkit/utils/ImageDump.java @@ -0,0 +1,35 @@ + +package com.rtfparserkit.utils; + +import com.rtfparserkit.parser.IRtfParser; +import com.rtfparserkit.parser.RtfStreamSource; +import com.rtfparserkit.parser.standard.StandardRtfParser; + +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.Map; + +public class ImageDump +{ + public static void main(String[] argv) + { + try + { + InputStream is = new FileInputStream(argv[0]); + IRtfParser parser = new StandardRtfParser(); + ImageListener listener = new ImageListener() { + @Override + public void handleImageData(Map data) { + // Handle image data here + System.out.println(data); + } + }; + parser.parse(new RtfStreamSource(is), listener); + } + + catch (Exception ex) + { + ex.printStackTrace(); + } + } +} diff --git a/src/main/java/com/rtfparserkit/utils/ImageListener.java b/src/main/java/com/rtfparserkit/utils/ImageListener.java new file mode 100644 index 0000000..2df3184 --- /dev/null +++ b/src/main/java/com/rtfparserkit/utils/ImageListener.java @@ -0,0 +1,60 @@ +package com.rtfparserkit.utils; + +import com.rtfparserkit.parser.RtfListenerAdaptor; +import com.rtfparserkit.rtf.Command; + +import java.util.HashMap; +import java.util.Map; + +public abstract class ImageListener extends RtfListenerAdaptor +{ + public abstract void handleImageData(Map data); + + public void processGroupStart() + { + ++groupDepth; + } + + /** + * Write a group end tag. + */ + @Override + public void processGroupEnd() + { + --groupDepth; + if (pictData != null && groupDepth < pictGroupDepth) + { + handleImageData(pictData); + pictData = null; + } + } + + public void processCommand(Command command, int parameter, boolean hasParameter, boolean optional) + { + if (pictData != null) + { + Integer value = hasParameter ? Integer.valueOf(parameter) : null; + pictData.put(command.getCommandName(), value); + } + else + { + if (command == Command.pict) + { + pictGroupDepth = groupDepth; + pictData = new HashMap(); + } + } + } + + public void processString(String string) + { + if (pictData != null) + { + pictData.put("data", string); + } + } + + private int groupDepth; + private int pictGroupDepth; + private Map pictData; +} From 26c80c8700eef4c127a7a12ec4af35f190993b21 Mon Sep 17 00:00:00 2001 From: joniles Date: Thu, 20 Oct 2022 16:59:33 +0100 Subject: [PATCH 30/30] Tidy up example --- .../java/com/rtfparserkit/utils/ImageListener.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/rtfparserkit/utils/ImageListener.java b/src/main/java/com/rtfparserkit/utils/ImageListener.java index 2df3184..cb13d7a 100644 --- a/src/main/java/com/rtfparserkit/utils/ImageListener.java +++ b/src/main/java/com/rtfparserkit/utils/ImageListener.java @@ -10,16 +10,12 @@ public abstract class ImageListener extends RtfListenerAdaptor { public abstract void handleImageData(Map data); - public void processGroupStart() + @Override public void processGroupStart() { ++groupDepth; } - /** - * Write a group end tag. - */ - @Override - public void processGroupEnd() + @Override public void processGroupEnd() { --groupDepth; if (pictData != null && groupDepth < pictGroupDepth) @@ -29,7 +25,7 @@ public void processGroupEnd() } } - public void processCommand(Command command, int parameter, boolean hasParameter, boolean optional) + @Override public void processCommand(Command command, int parameter, boolean hasParameter, boolean optional) { if (pictData != null) { @@ -46,7 +42,7 @@ public void processCommand(Command command, int parameter, boolean hasParameter, } } - public void processString(String string) + @Override public void processString(String string) { if (pictData != null) {