-
Notifications
You must be signed in to change notification settings - Fork 0
/
split.pl
71 lines (54 loc) · 1.46 KB
/
split.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!usr/bin/env perl
use strict;
use warnings;
no warnings 'uninitialized';
use Encode qw(encode decode);
my $enc = 'utf-8';
# Data::Dumper für Debugging
use Data::Dumper;
die "Argumente: $0 tei-xml file | outputpath \n" unless @ARGV == 2;
# Unicode-Support innerhalb des Perl-Skripts / für Output
use utf8;
binmode STDOUT, ":utf8";
my $teifile = $ARGV[0] or die "Need to get tei file on the command line\n";
my $buffer;
my $signature;
my $sigpos;
my $i = 1;
my $outputpath = $ARGV[1];
open(my $teidata, '<', $teifile) or die "Could not open '$teifile' $!\n";
while (my $line = <$teidata>) {
$buffer .= $line;
if (($line =~ /<\/idno$/) && $sigpos) {
$signature = $line;
$signature =~ s/^>//g;
$signature =~ s/<\/idno$//g;
$signature =~ s/\s:\s/_/g;
$signature =~ s/:/_/g;
$signature =~ s/,/_/g;
$signature =~ s/\s/_/g;
$signature =~ s/\//_/g;
$signature =~ s/-/_/g;
$signature =~ s/\(//g;
$signature =~ s/\)//g;
$signature =~ s/\[//g;
$signature =~ s/\]//g;
$signature =~ s/\{//g;
$signature =~ s/\}//g;
$signature =~ s/\*//g;
$signature =~ s/__$//g;
$signature =~ s/_$//g;
$sigpos = 0;
}
if ($line =~ /^><msIdentifier$/) {
$sigpos = 1;
}
if ($line =~ /^>$/) {
my $exportfile = "$outputpath/$signature.xml";
open(my $export, '>', $exportfile) or die "Could not open file '$exportfile' $!";
print $export $buffer;
print "File splitted: $exportfile\n";
$buffer = "";
$i++;
}
}