-
Notifications
You must be signed in to change notification settings - Fork 460
/
Dockerfile.crf
94 lines (70 loc) · 2.8 KB
/
Dockerfile.crf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
## Docker GROBID image
## Docker GROBID image using CRF models only - NOTE: you SHOULD use preferably the Deep Learning image
## See https://grobid.readthedocs.io/en/latest/Grobid-docker/
## docker build -t grobid/grobid:GROBID_VERSION --build-arg GROBID_VERSION=GROBID_VERSION .
## docker run -t --rm -p 8080:8070 -p 8081:8071 {image_name}
# To connect to the container with a bash shell
# > docker exec -i -t {container_name} /bin/bash
# -------------------
# build builder image
# -------------------
FROM openjdk:17-jdk-slim as builder
USER root
RUN apt-get update && \
apt-get -y upgrade && \
apt-get -y --no-install-recommends install unzip
WORKDIR /opt/grobid-source
# gradle
COPY gradle/ ./gradle/
COPY gradlew ./
COPY gradle.properties ./
COPY build.gradle ./
COPY settings.gradle ./
# source
COPY grobid-home/ ./grobid-home/
COPY grobid-core/ ./grobid-core/
COPY grobid-service/ ./grobid-service/
COPY grobid-trainer/ ./grobid-trainer/
# cleaning unused native libraries before packaging
RUN rm -rf grobid-home/pdf2xml
RUN rm -rf grobid-home/pdfalto/lin-32
RUN rm -rf grobid-home/pdfalto/mac-64
RUN rm -rf grobid-home/pdfalto/mac_arm-64
RUN rm -rf grobid-home/pdfalto/win-*
RUN rm -rf grobid-home/lib/lin-32
RUN rm -rf grobid-home/lib/win-*
RUN rm -rf grobid-home/lib/mac-64
# cleaning Delft models
RUN rm -rf grobid-home/models/*-BidLSTM_CRF*
ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep"
RUN ./gradlew clean assemble --no-daemon --info --stacktrace
WORKDIR /opt/grobid
RUN unzip -o /opt/grobid-source/grobid-service/build/distributions/grobid-service-*.zip && \
mv grobid-service* grobid-service
RUN unzip -o /opt/grobid-source/grobid-home/build/distributions/grobid-home-*.zip && \
chmod -R 755 /opt/grobid/grobid-home/pdfalto
RUN rm -rf grobid-source
# -------------------
# build runtime image
# -------------------
FROM openjdk:17-slim
RUN apt-get update && \
apt-get -y upgrade && \
apt-get -y --no-install-recommends install libxml2 libfontconfig && \
rm -rf /var/lib/apt/lists/*
# Add Tini
ENV TINI_VERSION v0.19.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
RUN chmod +x /tini
ENTRYPOINT ["/tini", "-s", "--"]
WORKDIR /opt/grobid
COPY --from=builder /opt/grobid .
ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED"
CMD ["./grobid-service/bin/grobid-service"]
ARG GROBID_VERSION
LABEL \
authors="The contributors" \
org.label-schema.name="GROBID" \
org.label-schema.description="Image with GROBID service" \
org.label-schema.url="https://github.com/kermitt2/grobid" \
org.label-schema.version=${GROBID_VERSION}