Skip to content

Commit

Permalink
Restructuration of the code and adding the JSON annotation files as p…
Browse files Browse the repository at this point in the history
…arameters
  • Loading branch information
reboutli-crim committed Oct 24, 2017
1 parent f310823 commit fddc1c7
Show file tree
Hide file tree
Showing 12,817 changed files with 723 additions and 30 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
Empty file modified .Rhistory
100644 → 100755
Empty file.
Empty file modified COPYING
100644 → 100755
Empty file.
Empty file modified README.md
100644 → 100755
Empty file.
Empty file modified conf/config.props
100644 → 100755
Empty file.
Empty file modified desc/annotator/AllLanguagesTokenizer.xml
100644 → 100755
Empty file.
Empty file modified desc/annotator/HeidelTime.xml
100644 → 100755
Empty file.
Empty file modified desc/annotator/HeidelTimeStyleMap.xml
100644 → 100755
Empty file.
Empty file modified desc/annotator/HunPosTaggerWrapper.xml
100644 → 100755
Empty file.
Empty file modified desc/annotator/IntervalTagger.xml
100644 → 100755
Empty file.
Empty file modified desc/annotator/JVnTextProWrapper.xml
100644 → 100755
Empty file.
Empty file modified desc/annotator/StanfordPOSTaggerWrapper.xml
100644 → 100755
Empty file.
Empty file modified desc/annotator/TreeTaggerWrapper.xml
100644 → 100755
Empty file.
Empty file modified desc/comsumer/ACETernWriter.xml
100644 → 100755
Empty file.
Empty file modified desc/comsumer/Eventi2014Writer.xml
100644 → 100755
Empty file.
Empty file modified desc/comsumer/Tempeval2Writer.xml
100644 → 100755
Empty file.
Empty file modified desc/comsumer/Tempeval3Writer.xml
100644 → 100755
Empty file.
Empty file modified desc/reader/ACETernReader.xml
100644 → 100755
Empty file.
Empty file modified desc/reader/Eventi2014Reader.xml
100644 → 100755
Empty file.
Empty file modified desc/reader/Tempeval2Reader.xml
100644 → 100755
Empty file.
Empty file modified desc/reader/Tempeval3Reader.xml
100644 → 100755
Empty file.
Empty file modified desc/type/HeidelTime_TypeSystem.xml
100644 → 100755
Empty file.
Empty file modified desc/type/HeidelTime_TypeSystemStyleMap.xml
100644 → 100755
Empty file.
Empty file modified doc/howToWriteRules.txt
100644 → 100755
Empty file.
Empty file modified doc/readme.txt
100644 → 100755
Empty file.
Empty file modified lib/uima-core.jar
100644 → 100755
Empty file.
Empty file modified metadata/adaptDKProDescriptors.sh
100644 → 100755
Empty file.
Empty file modified metadata/install.xml
100644 → 100755
Empty file.
Empty file modified metadata/jvntextpro-pom.xml
100644 → 100755
Empty file.
Empty file modified metadata/setenv
100644 → 100755
Empty file.
Empty file modified metadata/setenv.bat
100644 → 100755
Empty file.
Empty file modified metadata/standalone/pom.xml
100644 → 100755
Empty file.
Empty file modified metadata/webui/pom.xml
100644 → 100755
Empty file.
8 changes: 4 additions & 4 deletions pom.xml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>ca.crim.nlp</groupId>
<artifactId>crim-heideltime</artifactId>
<version>1.0.0-SNAPSHOT</version>
<version>3.0.0-SNAPSHOT</version>

<name>HeidelTime</name>
<description> This version of HeidelTime extends the well-known multilingual cross-domain temporal tagger (com.github.hiedltime) that extracts temporal expressions from documents and normalizes them according to the TIMEX3 annotation standard.</description>
Expand Down Expand Up @@ -54,7 +54,7 @@
</scm>

<build>
<sourceDirectory>src</sourceDirectory>
<sourceDirectory>src/main/java</sourceDirectory>
<outputDirectory>${basedir}/class</outputDirectory>
<resources>
<resource>
Expand All @@ -64,7 +64,7 @@
</includes>
</resource>
<resource>
<directory>resources/</directory>
<directory>${basedir}/main/resources/</directory>
<includes>
<include>**/*.txt</include>
</includes>
Expand Down Expand Up @@ -182,7 +182,7 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.stefanbirkner</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ public enum CLISwitch {
LOCALE ("Locale", "-locale", null),
POSTAGGER ("Part of Speech tagger", "-pos", POSTagger.TREETAGGER),
INTERVALS ("Interval Tagger", "-it"),
POSFILE ("Path to the JSON-file describing the POS", "-pf"),
SENTENCEFILE("Path to the JSON-file describing the sentences", "-sf"),
HELP ("This screen", "-h"),
;

Expand Down
File renamed without changes.
File renamed without changes.
72 changes: 60 additions & 12 deletions ...time/standalone/HeidelTimeStandalone.java → ...time/standalone/HeidelTimeStandalone.java
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

package de.unihd.dbs.heideltime.standalone;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
Expand Down Expand Up @@ -332,9 +333,9 @@ private void provideDocumentCreationTime(JCas jcas,
* @param jcas
* @throws Exception
*/
private void establishHeidelTimePreconditions(JCas jcas)throws Exception {
private void establishHeidelTimePreconditions(JCas jcas, String pos_file, String sentence_file)throws Exception {
// Token information & sentence structure
establishPartOfSpeechInformation(jcas);
establishPartOfSpeechInformation(jcas, pos_file, sentence_file);
}

/**
Expand All @@ -343,7 +344,7 @@ private void establishHeidelTimePreconditions(JCas jcas)throws Exception {
* @param jcas
* @throws Exception
*/
private void establishPartOfSpeechInformation(JCas jcas) throws Exception {
private void establishPartOfSpeechInformation(JCas jcas, String pos_file, String sentence_file) throws Exception {
logger.log(Level.FINEST, "Establishing part of speech information...");

PartOfSpeechTagger partOfSpeechTagger = null;
Expand Down Expand Up @@ -437,7 +438,10 @@ private void establishPartOfSpeechInformation(JCas jcas) throws Exception {
}
}
partOfSpeechTagger.initialize(settings);
partOfSpeechTagger.process(jcas);
if (!POSTagger.JSON.equals(posTagger))
partOfSpeechTagger.process(jcas);
else
((JSONTaggerWrapper)partOfSpeechTagger).process(jcas, pos_file, sentence_file);
partOfSpeechTagger.reset();

logger.log(Level.FINEST, "Part of speech information established");
Expand Down Expand Up @@ -466,7 +470,7 @@ private ResultFormatter getFormatter() {
*/
public String process(String document)
throws DocumentCreationTimeMissingException {
return process(document, null, getFormatter());
return process(document, null, getFormatter(), null, null);
}

/**
Expand All @@ -480,9 +484,9 @@ public String process(String document)
* {@link #process(String, Date)} instead to provide document
* creation time!
*/
public String process(String document, Date documentCreationTime)
public String process(String document, Date documentCreationTime, String pos_file, String sentence_file)
throws DocumentCreationTimeMissingException {
return process(document, documentCreationTime, getFormatter());
return process(document, documentCreationTime, getFormatter(), pos_file, sentence_file);
}

/**
Expand All @@ -498,7 +502,7 @@ public String process(String document, Date documentCreationTime)
*/
public String process(String document, ResultFormatter resultFormatter)
throws DocumentCreationTimeMissingException {
return process(document, null, resultFormatter);
return process(document, null, resultFormatter, null, null);
}

/**
Expand All @@ -513,7 +517,7 @@ public String process(String document, ResultFormatter resultFormatter)
* If document creation time is missing when processing a
* document of type {@link DocumentType#NEWS}
*/
public String process(String document, Date documentCreationTime, ResultFormatter resultFormatter)
public String process(String document, Date documentCreationTime, ResultFormatter resultFormatter, String pos_file, String sentence_file)
throws DocumentCreationTimeMissingException {
logger.log(Level.INFO, "Processing started");

Expand All @@ -533,7 +537,7 @@ public String process(String document, Date documentCreationTime, ResultFormatte
try {
logger.log(Level.FINER, "Establishing preconditions...");
provideDocumentCreationTime(jcas, documentCreationTime);
establishHeidelTimePreconditions(jcas);
establishHeidelTimePreconditions(jcas, pos_file, sentence_file);
logger.log(Level.FINER, "Preconditions established");

heidelTime.process(jcas);
Expand Down Expand Up @@ -756,8 +760,52 @@ public static void main(String[] args) {
} else {
// Type not found
posTagger = (POSTagger) CLISwitch.POSTAGGER.getValue();
logger.log(Level.INFO, "POS Tagger '-pos': NOT FOUND OR RECOGNIZED; set to "+posTagger.toString().toUpperCase());
logger.log(Level.INFO, "POS Tagger '-pos': NOT FOUND OR RECOGNIZED; set to "+ posTagger.toString().toUpperCase());
}

// If POS tagger is JSONTagger, make sure you have a path for the sentence annotations and the POS annotations
String pos_file = "";
String sentence_file = "";
if (posTagger == POSTagger.JSON) {
if(CLISwitch.POSFILE.getIsActive()) {
try {
pos_file = CLISwitch.POSFILE.getValue().toString();
File f = new File(pos_file);
if( !f.exists() || f.isDirectory()) {
throw new IllegalArgumentException();
}

} catch(IllegalArgumentException e) {
logger.log(Level.WARNING, "Given part-of-speech JSON file doesn't exist.");
System.exit(-1);
}
logger.log(Level.INFO, "Part-of-speech JSON file '-pf': "+ pos_file);
} else {
// Path to part-of-speech JSON file needed
logger.log(Level.INFO, "Path to the part-of-speech JSON file needed");
System.exit(-1);
}

if(CLISwitch.SENTENCEFILE.getIsActive()) {
try {
sentence_file = CLISwitch.SENTENCEFILE.getValue().toString();
File f = new File(sentence_file);
if( !f.exists() || f.isDirectory()) {
throw new IllegalArgumentException();
}

} catch(IllegalArgumentException e) {
logger.log(Level.WARNING, "Given sentence JSON file doesn't exist.");
System.exit(-1);
}
logger.log(Level.INFO, "Sentence JSON file '-sf': "+ sentence_file);
} else {
// Path to part-of-speech JSON file needed
logger.log(Level.INFO, "Path to the sentence JSON file needed");
System.exit(-1);
}
}


// Set whether or not to use the Interval Tagger
Boolean doIntervalTagging = false;
Expand Down Expand Up @@ -799,7 +847,7 @@ public static void main(String[] args) {
String input = new String(new String(inArr, encodingType).getBytes("UTF-8"), "UTF-8");

HeidelTimeStandalone standalone = new HeidelTimeStandalone(language, type, outputType, null, posTagger, doIntervalTagging);
String out = standalone.process(input, dct);
String out = standalone.process(input, dct, pos_file, sentence_file);

// Print output always as UTF-8
pwOut = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
48 changes: 37 additions & 11 deletions ...ne/components/impl/JSONTaggerWrapper.java → ...ne/components/impl/JSONTaggerWrapper.java
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import de.unihd.dbs.uima.types.heideltime.Sentence;
import de.unihd.dbs.uima.types.heideltime.Token;


/**
* The JSON tagger wrapper reads JSON files containing the sentence annotations and the POS annotations
* The path to these two files should be indicated in the environment
Expand All @@ -29,12 +30,16 @@ public class JSONTaggerWrapper implements PartOfSpeechTagger {

private String configFile;
private Hashtable<String, List<String>> defs = new Hashtable<String, List<String>>();



public String SENTENCES = "sentences";
public String SENTENCE_BEGIN = "sentence_begin";
public String SENTENCE_END = "sentence_end";
public String TOKENS = "tokens";
public String TOKEN_BEGIN = "token_begin";
public String TOKEN_END = "token_end";
public String TOKEN_POS = "token_pos";


@Override
public void initialize(Properties settings) throws IllegalArgumentException, IOException {
Expand Down Expand Up @@ -64,31 +69,52 @@ public void initialize(Properties settings) throws IllegalArgumentException, IOE
|| defs.get(SENTENCE_END)== null
|| defs.get(TOKEN_BEGIN)== null
|| defs.get(TOKEN_END)== null
||defs.get(TOKEN_POS)== null)
||defs.get(TOKEN_POS)== null
||defs.get(TOKENS)== null
||defs.get(SENTENCES)== null)
throw new IOException(String.format("The JSON configfile '%s' is not valid.", configFile));
}
}

@Override
public void process(JCas jcas) throws Exception {
String sentence_annotation_filepath = System.getenv("SENTENCE_ANNOTATION_FILE_PATH");
String token_annotation_filepath = System.getenv("POS_ANNOTATION_FILE_PATH");
throw new Exception("Missing arguments : sentence annotation file path and part-of-speech annotation file path.");
}

public void process(JCas jcas, String token_annotation_filepath, String sentence_annotation_filepath) throws Exception {
JSONParser parser = new JSONParser();

JSONArray sentences = (JSONArray) parser.parse(new FileReader(sentence_annotation_filepath));
// getting sentences
List<String> path_to_sentences = defs.get(SENTENCES);
JSONArray sentences;
if (!path_to_sentences.get(0).equalsIgnoreCase("None"))
sentences = (JSONArray) getInfo(parser.parse(new FileReader(sentence_annotation_filepath)), path_to_sentences);
else
sentences= (JSONArray) parser.parse(new FileReader(sentence_annotation_filepath));
for (Object o : sentences) {
Sentence s = new Sentence(jcas, (int)(long)getInfo(o, defs.get(SENTENCE_BEGIN)), (int)(long)getInfo(o, defs.get(SENTENCE_END)));
s.addToIndexes();
}

JSONArray tokens = (JSONArray) parser.parse(new FileReader(token_annotation_filepath));

// getting tokens
List<String> path_to_tokens = defs.get(TOKENS);
JSONArray tokens ;
if (!path_to_tokens.get(0).equalsIgnoreCase("None"))
tokens = (JSONArray) getInfo(parser.parse(new FileReader(token_annotation_filepath)), path_to_tokens);
else
tokens= (JSONArray) parser.parse(new FileReader(token_annotation_filepath));
for (Object o : tokens) {
Token t = new Token(jcas, (int)(long)getInfo(o, defs.get(TOKEN_BEGIN)), (int)(long)getInfo(o, defs.get(TOKEN_END)));
t.setPos((String) getInfo(o, defs.get(TOKEN_POS)));
t.addToIndexes();
String pos = (String) getInfo(o, defs.get(TOKEN_POS));
if ( pos != null
&& pos.length() > 0
){
Token t = new Token(jcas, (int)(long)getInfo(o, defs.get(TOKEN_BEGIN)), (int)(long)getInfo(o, defs.get(TOKEN_END)));
t.setPos(pos);
t.addToIndexes();
}
}
}

@Override
public void reset() {
// TODO Auto-generated method stub
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit fddc1c7

Please sign in to comment.