Skip to content

Commit

Permalink
Merge pull request #10 from lfoppiano/update-dropwizard
Browse files Browse the repository at this point in the history
Update to Dropwizard 4
  • Loading branch information
kermitt2 authored Sep 8, 2024
2 parents 9d85b17 + a983954 commit acea820
Show file tree
Hide file tree
Showing 16 changed files with 188 additions and 138 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/ci-build-manual.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Build and push a development version on docker

on:
workflow_dispatch:


jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: 'gradle'
- name: Build with Gradle
run: ./gradlew build -x test

docker-build:
needs: [ build ]
runs-on: ubuntu-latest

steps:
- name: Create more disk space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /opt/hostedtoolcache
- uses: actions/checkout@v4
- name: Build and push
id: docker_build
uses: mr-smithers-excellent/docker-build-push@v6
with:
dockerfile: Dockerfile
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
image: lfoppiano/Pub2TEI
registry: docker.io
pushImage: true
tags: latest-develop
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
27 changes: 27 additions & 0 deletions .github/workflows/ci-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Build unstable

on: [push]

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Checkout grobid home
uses: actions/checkout@v4
with:
repository: kermitt2/grobid
path: ./grobid
- name: Checkout Pub2TEI
uses: actions/checkout@v4
with:
path: ./grobid/Pub2TEI
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: 'gradle'
- name: Build and run integration tests
working-directory: ./grobid/Pub2TEI
run: ./gradlew test --stacktrace --info
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Docker Pub2TEI image using Grobid deep learning models and/or CRF models for transformation enhancements

# this is the full GROBID image using NVIDIA Container Toolkit to automatically recognize possible GPU drivers on the host machine
FROM grobid/grobid:0.8.0
FROM lfoppiano/grobid:0.8.0-full-slim

# Add Tini
ENV TINI_VERSION v0.19.0
Expand Down
65 changes: 34 additions & 31 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ repositories {
maven {
url new File(rootProject.rootDir, "localLibs")
}
maven { url "https://grobid.s3.eu-west-1.amazonaws.com/repo/" }
flatDir {
dirs 'localLibs'
}
// maven { url "https://grobid.s3.eu-west-1.amazonaws.com/repo/" }
}

apply plugin: 'application'
Expand All @@ -39,8 +42,8 @@ version '0.2'

description = """transform the myriad of scientific publisher XML into the same TEI XML format, common to GROBID"""

sourceCompatibility = 1.8
targetCompatibility = 1.8
sourceCompatibility = 1.11
targetCompatibility = 1.11

tasks.withType(JavaCompile) {
options.encoding = 'UTF-8'
Expand Down Expand Up @@ -77,8 +80,10 @@ dependencies {
testImplementation group: 'junit', name: 'junit', version: '4.12'
testImplementation "org.hamcrest:hamcrest-all:1.3"
testImplementation "org.easymock:easymock:3.5"
testImplementation "org.xmlunit:xmlunit-matchers:2.10.0"
testImplementation "org.xmlunit:xmlunit-legacy:2.10.0"

// packaging local libs
// packaging local libs
implementation fileTree(dir: new File(rootProject.rootDir, 'localLibs'), include: localLibs)

implementation(group: 'xml-apis', name: 'xml-apis') {
Expand All @@ -96,28 +101,39 @@ dependencies {
implementation group: 'org.grobid', name: 'grobid-core', version: '0.8.0'
implementation "black.ninia:jep:4.0.2"

implementation "io.dropwizard:dropwizard-core:1.3.29"
implementation "io.dropwizard:dropwizard-assets:1.3.29"
implementation "com.hubspot.dropwizard:dropwizard-guicier:1.3.5.2"
implementation "io.dropwizard:dropwizard-testing:1.3.29"
implementation "io.dropwizard:dropwizard-forms:1.3.29"
implementation "io.dropwizard:dropwizard-client:1.3.29"
implementation "io.dropwizard:dropwizard-auth:1.3.29"
implementation "io.dropwizard.metrics:metrics-core:4.0.5"
implementation "io.dropwizard.metrics:metrics-servlets:4.0.5"
implementation 'ru.vyarus:dropwizard-guicey:7.0.0'
implementation 'io.dropwizard:dropwizard-bom:4.0.0'
implementation 'io.dropwizard:dropwizard-core:4.0.0'
implementation 'io.dropwizard:dropwizard-assets:4.0.0'
implementation 'io.dropwizard:dropwizard-testing:4.0.0'
implementation 'io.dropwizard:dropwizard-forms:4.0.0'
implementation 'io.dropwizard:dropwizard-client:4.0.0'
implementation 'io.dropwizard:dropwizard-auth:4.0.0'
implementation 'io.dropwizard.metrics:metrics-core:4.2.22'
implementation 'io.dropwizard.metrics:metrics-servlets:4.2.22'

implementation "xerces:xercesImpl:2.12.0"
implementation "net.arnx:jsonic:1.3.10"
implementation "net.sf.saxon:Saxon-HE:9.6.0-9"
implementation "xom:xom:1.3.2"
implementation 'javax.xml.bind:jaxb-api:2.3.0'
implementation 'org.apache.opennlp:opennlp-tools:1.9.1'
implementation 'black.ninia:jep:4.0.2'
implementation "org.apache.httpcomponents:httpclient:4.5.3"
implementation "org.apache.lucene:lucene-analyzers-common:4.5.1"
implementation group: 'org.jruby', name: 'jruby-complete', version: '9.2.13.0'

implementation 'org.slf4j:slf4j-api:1.7.30'
implementation 'ch.qos.logback:logback-classic:1.2.3'

implementation "com.rockymadden.stringmetric:stringmetric-core_2.10:0.27.3"

//Parsing XML/JSON
//implementation group: 'org.codehaus.woodstox', name: 'stax2-api', version: '4.0.0'
//implementation group: 'org.codehaus.woodstox', name: 'woodstox-core-asl', version: '4.4.1'
implementation "com.fasterxml.jackson.core:jackson-core:2.10.1"
implementation "com.fasterxml.jackson.core:jackson-databind:2.10.1"
implementation "com.fasterxml.jackson.module:jackson-module-afterburner:2.10.1"
implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.10.1"
implementation "com.fasterxml.jackson.core:jackson-core:2.14.3"
implementation "com.fasterxml.jackson.core:jackson-databind:2.14.3"
// implementation "com.fasterxml.jackson.module:jackson-module-afterburner:2.14.3"
implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.14.3"

// compile group: 'org.apache.httpcomponents', name: 'httpmime', version: '4.5.3'
implementation 'org.apache.commons:commons-collections4:4.1'
Expand All @@ -127,19 +143,6 @@ dependencies {
}


/*def libraries = ""
if (Os.isFamily(Os.FAMILY_MAC)) {
if (Os.OS_ARCH.equals("aarch64")) {
libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}"
} else {
libraries = "${file("../grobid-home/lib/mac-64").absolutePath}"
}
} else if (Os.isFamily(Os.FAMILY_UNIX)) {
libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" +
"${file("../grobid-home/lib/lin-64").absolutePath}:"
} else {
throw new RuntimeException("Unsupported platform!")
}*/

task mainJar(type: ShadowJar) {
zip64 true
Expand Down
1 change: 0 additions & 1 deletion client/pub2tei_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Pub2TEI Python Client
"""
import os
import io
import json
import argparse
import time
Expand Down
Binary file added localLibs/grobid-core-0.8.0.jar
Binary file not shown.
5 changes: 3 additions & 2 deletions resources/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,18 @@ corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"

server:
type: custom
idleTimeout: 180 seconds
applicationConnectors:
- type: http
port: 8060
idleTimeout: 180 seconds
acceptQueueSize: 1024
adminConnectors:
- type: http
port: 8061
registerDefaultExceptionMappers: false
maxThreads: 1024
maxQueuedRequests: 1024
acceptQueueSize: 1024

#requestLog:
# appenders: []

Expand Down
1 change: 1 addition & 0 deletions settings.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rootProject.name = "Pub2TEI"
20 changes: 11 additions & 9 deletions src/main/java/org/pub2tei/service/HealthCheck.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package org.pub2tei.service;

import javax.inject.Inject;
import javax.inject.Singleton;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.core.Response;

import static javax.ws.rs.core.MediaType.APPLICATION_JSON;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.Produces;
import jakarta.ws.rs.core.Response;

import static jakarta.ws.rs.core.MediaType.APPLICATION_JSON;

@Path("health")
@Singleton
Expand All @@ -17,8 +18,9 @@ public class HealthCheck extends com.codahale.metrics.health.HealthCheck {
@Inject
private ServiceConfiguration configuration;

@Inject
public HealthCheck() {
@jakarta.inject.Inject
public HealthCheck(ServiceConfiguration configuration) {
this.configuration = configuration;
}

@GET
Expand Down
11 changes: 2 additions & 9 deletions src/main/java/org/pub2tei/service/ProcessFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,13 @@

import org.pub2tei.document.DocumentProcessor;

import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.Response.Status;
import javax.ws.rs.core.StreamingOutput;

/*import jakarta.ws.rs.core.HttpHeaders;
import jakarta.ws.rs.core.HttpHeaders;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import jakarta.ws.rs.core.Response.Status;*/
import jakarta.ws.rs.core.Response.Status;

import java.io.*;
import java.nio.charset.Charset;
import java.util.*;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down
38 changes: 16 additions & 22 deletions src/main/java/org/pub2tei/service/ProcessString.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,16 @@

import com.google.inject.Inject;
import com.google.inject.Singleton;

import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import org.apache.commons.lang3.StringUtils;
import org.pub2tei.document.DocumentProcessor;

import java.util.List;
import java.util.ArrayList;
import java.util.NoSuchElementException;
import java.io.*;

import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang3.StringUtils;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.NoSuchElementException;

/**
*
Expand All @@ -41,17 +35,17 @@ public ProcessString() {
* @param text the raw string to process
* @return a response object containing the structured xml representation
*/
public static Response processText(String text,
final boolean segmentSentences,
final boolean refine,
final int consolidateReferences,
ServiceConfiguration serviceConfiguration) {
public static Response processText(String text,
final boolean segmentSentences,
final boolean refine,
final int consolidateReferences,
ServiceConfiguration serviceConfiguration) {
LOGGER.debug(methodLogIn());
Response response = null;

if (text == null || text.length() == 0) {
LOGGER.warn("Empty text input");
response = Response.status(Status.BAD_REQUEST).build();
response = Response.status(Response.Status.BAD_REQUEST).build();
LOGGER.debug(methodLogOut());
return response;
}
Expand All @@ -64,16 +58,16 @@ public static Response processText(String text,
String retValString = documentProcessor.processXML(inputStream, segmentSentences, refine, consolidateReferences);

if (!isResultOK(retValString)) {
response = Response.status(Status.NO_CONTENT).build();
response = Response.status(Response.Status.NO_CONTENT).build();
} else {
response = Response.status(Status.OK).entity(retValString).type(MediaType.TEXT_PLAIN).build();
response = Response.status(Response.Status.OK).entity(retValString).type(MediaType.TEXT_PLAIN).build();
}
} catch (NoSuchElementException nseExp) {
LOGGER.error("Could not get an instance of converter. Sending service unavailable.");
response = Response.status(Status.SERVICE_UNAVAILABLE).build();
response = Response.status(Response.Status.SERVICE_UNAVAILABLE).build();
} catch (Exception e) {
LOGGER.error("An unexpected exception occurs. ", e);
response = Response.status(Status.INTERNAL_SERVER_ERROR).build();
response = Response.status(Response.Status.INTERNAL_SERVER_ERROR).build();
}
LOGGER.debug(methodLogOut());
return response;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/pub2tei/service/Pub2TEIPaths.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public interface Pub2TEIPaths {
String PATH_IS_ALIVE = "isalive";

/**
* path extension for procssing an XML text
* path extension for processing an XML text
*/
public static final String PATH_TEXT = "processText";

Expand Down
Loading

0 comments on commit acea820

Please sign in to comment.