-
Notifications
You must be signed in to change notification settings - Fork 0
/
make-seq2tei.sh
executable file
·95 lines (78 loc) · 2.02 KB
/
make-seq2tei.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
# make-seq2ead.sh
# Export-Skript für DSV05-Daten nach Kalliope im ead-Format
#
# author:
#
# history:
DO_DOWNLOAD=1
DO_TRANSFORM=0
DO_SPLIT=0
DO_FINISH=0
HOME=/home/basil/catmandu/han_seq2tei_kuratorium/
FILES=tmp/split/*
LINE='------------------------------------------------'
echo $LINE
echo "Exporting DSV05 data for TEI"
echo "START: $(date)"
echo $LINE
echo $LINE
cd $HOME
if [ "$DO_DOWNLOAD" == "1" ]; then
echo $LINE
echo "*Downloading DSV05 data"
echo $LINE
$HOME/download-dsv05-sequential.sh
mv $HOME/dsv05.seq $HOME/input/
fi
if [ "$DO_TRANSFORM" == "1" ]; then
echo $LINE
echo "*Transforming DSV05 data to tei"
echo $LINE
perl seq2tei.pl input/dsv05.seq tmp/tei.xml
fi
if [ "$DO_SPLIT" == "1" ]; then
echo $LINE
echo "*Splitting tei-file"
echo $LINE
cd tmp/split
find . -name "*.xml" -print0 | xargs -0 rm
cd $HOME
cd output/
rm validation_errors.txt
rm validation_ok.txt
cd $HOME
cd output/validation
find . -name "*.xml" -print0 | xargs -0 rm
cd $HOME
cd output/no_validation
find . -name "*.xml" -print0 | xargs -0 rm
cd $HOME
perl split.pl tmp/tei.xml tmp/split
fi
if [ "$DO_FINISH" == "1" ]; then
echo $LINE
echo "*Finishing tei-files"
echo $LINE
for f in $FILES
do
echo "Finishing file $f"
xsltproc sanitise.xsl $f > $f.san
xmllint --format $f.san > $f
rm $f.san
sed 's/<TEI>/<TEI xmlns="http:\/\/www.tei-c.org\/ns\/1.0" version="5.0">/g' $f > $f.valid
mv $f.valid $f
#output="$(xmllint --noout --schema tei_all.xsd $f 2>&1)"
output="$(xmllint --noout $f 2>&1)"
if [[ $output =~ fails.to.validate ]];
then
cp $f output/no_validation/
echo $output >> output/validation_errors.txt;
else
cp $f output/validation/
echo $output >> output/validation_ok.txt;
fi
done
fi
echo "END: $(date)"