-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter_alephseq.pl
executable file
·115 lines (100 loc) · 2.61 KB
/
filter_alephseq.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/perl
use strict;
use Getopt::Long;
use File::Basename;
my $PROG=File::Basename::basename($0);
sub usage {
print STDERR<<EOD;
$PROG - 26.07.2010 (c) andres.vonarx\@unibas.ch
usage: perl $PROG [options]
Reads in an Aleph sequential file and outputs all records where the content
of a certain MARC field and subfield matches a given regular expression.
options:
--help show this text and quit
--version show version information and quit
--input=filename Aleph sequential input file (default: stdin)
--output=filename Aleph sequential output file (default: stdout)
--marctag=string where to look for: MARC field[+indicator] [subfield]
--regex=regex what to look for (regular expression)
--[no]ignorecase ignore case in pattern matching (default:ignorecase)
example:
\$ perl $PROG --in=dsv05.seq --out=gosteli.seq --marc='852 b' --regex='Gosteli' --noignorecase
EOD
exit;
}
my $help;
my $infile = '-';
my $outfile = '-';
my $marctag = '';
my $filter = '';
my $regex = '';
my $ignorecase = 1;
GetOptions(
'help' => \$help,
'version' => \$help,
'input=s' => \$infile,
'output=s' => \$outfile,
'marctag=s' => \$marctag,
'regex=s' => \$regex,
'ignorecase!' => \$ignorecase,
) or usage;
($help) and usage;
($infile) or usage;
($outfile) or usage;
($marctag) or usage;
($regex) or usage;
# -- Where to search: extract MARC field and subfield
$_ = $marctag;
my ( $Field, $SubField ) = split;
# -- What to search: generate and validate regular expression
if ( $ignorecase ) {
eval { $regex = qr/$regex/i } ;
( $@ ) and die "$PROG: bad regular expression: $regex\n";
} else {
eval { $regex = qr/$regex/ } ;
( $@ ) and die "$PROG: bad regular expression: $regex\n";
}
open(IN, "<$infile") or die("cannot read $infile: $!");
open(OUT, ">$outfile") or die("cannot write $outfile: $!");
my $prev_sysno=0;
my $Rec='';
my $ok=0;
while ( <IN> ) {
my($sysno)= /^(\d+)/;
if ( $sysno ne $prev_sysno ) {
checkrec();
$prev_sysno=$sysno;
}
$Rec .= $_;
}
checkrec();
close IN;
close OUT;
sub checkrec {
( $Rec ) or return;
my $ok=0;
local $_;
my @a = split(/\n/, $Rec);
while ( @a ) {
$_ = shift @a;
s/^\d+ (.....)...//;
my $tag = trim($1);
if ( $tag eq $Field ) {
if ( $SubField ) {
s|^.*\$\$$SubField||;
s|\$\$.*$||;
}
if ( m/$regex/ ) {
print OUT $Rec;
last;
}
}
}
$Rec='';
}
sub trim {
local $_ = shift;
s/^\s+//;
s/\s+$//;
$_;
}