-
Notifications
You must be signed in to change notification settings - Fork 0
/
logengine.pl
executable file
·3638 lines (3188 loc) · 114 KB
/
logengine.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/perl
#######################################################################
# logengine.pl, ABr, 5/30/01
#
# Engine that allows us to analyze any log file for patterns, and then
# to take actions based on the patterns.
#
# This code is copyrighted by Andy Bruce ("the author"). It is freely
# available for use in any application, but the source code must be
# released as-is. No warranty, either express or implied, is provided
# with this code. No implied warranty, nor implication of fitness for
# use or purpose, is provided with this code. This code is provided to
# the computing community in the hope that it will be useful. While the
# author is interested in hearing of defects or suggested improvements,
# the author makes no provision or promise to implement any suggestions
# or corrections.
use strict ;
require "debug.pl" ;
require "os.pl" ;
require "paths.pl" ;
require "utils.pl" ;
#######################################################################
# STRUCTURES
#
# A handy structure for holding name/value pairs. Used all over the
# place in this script.
# NAMEVALUEOBJ:
# NAME => name of the object
# VALUE => value associated with the object
#
# A handy structure is the "hash-array". A hash-array allows you to
# store an item indexed by a key (for fast lookups) as well as an
# array (to maintain original order).
# HASHARRAY:
# ARRAY => array entries
# HASH => hash entries
#
# Handy structure for the log files we open and process.
# LOGFILE:
# NAME => file name
# KEEP_OPEN => should this file continue to be read forever?
# HANDLE => the open file handle for this file
# LINENO => current line number
#
# This script can read any number of rule scripts. When we parse the
# command line, we store each rule script to read as one of these.
# RULESCRIPT:
# NAME => file name
# RULES => rules specific to this script
# PROCESSED => has this script file been processed?
#
# The RULEVAR structure defines a NAME/VALUE pair with an additional
# "PAREN" field. The PAREN field gives the numeric index of the match
# that we need to extract if the variable is a run-time variable.
# RULEVAR:
# NAME => name of the variable
# VALUE => for compile-time fields, always set. for run-time
# vars, determined at run-time
# PAREN => see above
# IS_ARRAY => true if this is an ARRAY variable
# IS_RUNTIME => true if this is a runtime variable
#
# The RUNTIMEEXTRACT structure defines the meta-data necessary to
# bind information from one variable to another variable at runtime.
# A specific example:
# BEGIN = $$RUNTIME_DATA([0-9]+)
# BEGIN = $RUNTIME_DATA
# In this example, the first BEGIN defines a variable called
# RUNTIME_DATA. The value of this variable is known only after a
# match is made (in this case, reading a line with one or more
# numerics in it). The second BEGIN gets $RUNTIME_DATA replaced
# *at run-time* with the value retrieved from the first match.
# Assume two input lines like the following:
# "Hello, World! 12345 is the number!"
# "For the second line, 12345 is what we have!"
# We process the first line. It matches the first BEGIN. Because
# the first BEGIN specifies a run-time variable, we take the
# appropriate data ("12345", the numeric) and save it into
# RUNTIME_DATA.
# Then we process the second line. Because the first BEGIN
# already matched, we check the second line against the second
# BEGIN. Before we do the regex, we replace the reference to
# RUNTIME_DATA with the value we retrieved from the first line.
# Thus, the second BEGIN becomes simply:
# BEGIN = 12345
# The second line matches, because the pattern "12345" occurs in
# it.
#
# RUNTIMEEXTRACT:
# VARNAME => name of the variable to bind to
# MATCH_IDX => number of the match, as in "$1" or "$2" in a
# straight Perl program
# IS_ARRAY => true if this variable is an array
#
# The RUNTIMEINSERT structure is the inverse of the RUNTIMEEXTRACT
# structure. For a particular match item, it may be necessary to
# insert a data value extracted by a different match in the same
# rule. In this case, we need to know the character position
# where the insertion should occur.
# RUNTIMEINSERT:
# VARNAME => name of the variable to bind from
# CHAR_POS => index where the data should be inserted
# LENGTH => length of the item to replace in the string
#
# A compiled regex (for speed; we can precompile if no run-time
# vars are specified in the regex).
# COMPILED_SUB:
# SRC_CODE => source code to be compiled
# COMPILED_CODE => compiled source code
#
# CACHEMATCH: This structure exists to *speed up* rule processing.
# Basically, if we detect that a regular expression is being executed
# twice, we save off the results from the first comparison.
# CACHEMATCH:
# REGEX => regex expression
# NUM_REGEXES => number of regexes for all rules that use this cache
# LAST_LINEID => last ID for which we have a match
# RESULTS => array of matched items from last regex
# NUM_MATCHES => number of matches made through this cache item
# COMPILED_SUB => COMPILED_SUB object
#
# The BEGIN/END match data associated with a rule has the following
# structure. Keep in mind that RUNTIME_VARS are basically deferred
# macros that get replaced just prior to doing a regex on a particular
# match item.
# MATCHDATA:
# ORIG_VALUE => unexpanded, original value
# REGEX_OPTIONS => regex options
# VALUE => expanded value (including all macros)
# RUNTIME_INSERT => array of RUNTIMEINSERT entries
# RUNTIME_EXTRACT => array of RUNTIMEEXTRACT entries
# IS_PRE => is this a "pre" match?
# IS_BEGIN => is this a "BEGIN" match?
# IS_END => is this an "END" match?
# IS_ACCUM => flag if this is an "accumulation"
# IS_CODE => does this match actually refer to code or a regex?
# MATCH_TIMEOUT => # lines within which this match must occur
# CACHEMATCH => if set, then this is the reference to the cached regex
# COMPILED_SUB => if IS_CODE true, then this is the compiled routine
#
# ACTIONOBJ defines a single action object
# NAME => name of the action
# VALUE => source code to execute
# CODE => compiled code (created after first execution)
#
# The RULE structure defines a single atomic rule that we can process
# RULEOBJ:
# NAME => name of the rule
# FULLNAME => fully-qualified rule name (for script context)
# IS_MACRO => is this a "real" rule or simply a macro?
# ENABLED => is this rule to be processed or not?
# SCRIPT => RULESCRIPT object that we read this rule from
# STARTLINE => beginning line for this rule from the script
# STOPLINE => ending line for this rule from the script
# ACTION => hash of ACTIONOBJ actions for this rule
# PRE_MATCH => array of MATCHDATA entries
# BEGIN_MATCH => array of MATCHDATA entries
# OPTIONAL_MATCH => array of MATCHDATA entries
# END_MATCH => array of MATCHDATA entries
# MATCHES => array of MATCHDATA entries, in order
# VARS => hash of RULEVAR entries associated with this rule
# FOUND => set to non-zero if this rule ever got executed
# RULE_TIMEOUT => if no matches in this # of lines, destroy instance
#
# The RULEINST structure defines a *potential* match for a rule as we
# scan through the log file(s).
# RULEINST:
# FIRST_TIME => flag
# RULE => pointer back to rule
# AR_INDEX => array index in global variable
# HASH_INDEX => array index of this instance in global hash
# MATCH_IDX => index number of current match
# MATCH_CNT => number of array entries (for quick reference)
# STARTLINE => line number where the match begin
# STOPLINE => line number where the match ended
# RTVARS => hash of RUNTIMEVARS
# LAST_MATCH_LINE => line # of the last match (for timeout)
# RULES_CREATED => hash of rule names created from this rule
# LOGFILE => LOGFILE structure where the first match occurred
#
# The RUNTIMEVAR structure allows us to store extracted run-time
# data separately for each rule instance.
# RUNTIMEVAR:
# RULEINST => instance of the rule
# RULEVAR => RULEVAR entry from the rule vars hash
# VALUE => actual value extracted at run-time
#######################################################################
# GLOBALS
my $gRC_SUCCESS = 0 ; # no error
my $gRC_HELP = 1 ; # help requested
my $gRC_CMD_LINE_SWITCH = 2 ; # invalid switch passed
my $gRC_CMD_LINE_INTERNAL = 3 ; # internal cmd line parse err
my $gRC_CMD_LINE_ARGS = 4 ; # error with cmd line args
my $gRC_NO_SCRIPTS = 5 ; # no scripts specified
my $gRC_SCRIPT = 6 ; # general error in script
my $gRC_ERROR = 7 ; # general purpose error
# for sorting logfile names read from disk
my $gSORT_NONE = 0 ;
my $gSORT_ASC = 1 ;
my $gSORT_DESC = 2 ;
my $gFOREVER = 0 ; # read next log file forever?
my $gSORT = $gSORT_NONE ; # sort logfile names from disk?
my $gDUMP = 0 ; # dump the loaded file?
my $gSTATUS = 0 ; # show status as we process?
my $gSTUDY = 0 ; # should we study lines?
my $gBUFFER = 0 ; # should we use buffered i/o?
my $gFAST = 1 ; # fast simple rule processing?
my $gMAN = 0 ; # show add'l usage notes?
my $gPrintedTitle = 0 ; # only print the title once
my $gPrintedUsage = 0 ; # only print the help once
my %gUserOptions ; # user options from cmd line
my %gUserOptionsUsed ; # helper to save ref'ed options
# the list of shared code variables. each one is eval'ed at the
# global scope.
my @gSharedCode ;
# the list of termination code variables; gets run at program end
my @gTerminationCode ;
# this is the list of all loaded rules for all scripts
my $gRules = {
ARRAY => [],
HASH => {},
} ;
# this is the list of rules that we need to process as we read each
# line from the log file(s). it's a subset of the gRules above
my @gRulesToProcess ;
# this is the actual set of currently active rule instances. the
# hash is a hash of rule instance arrays, which allows me to
# support multiple concurrent instances of a rule. the array is
# simply an array of rule instances.
my %gRuleInstances ;
my @gRuleInstances ;
my $gInstanceIndex = 0 ;
my $gNumInstances = 0 ;
my $gRuleTimeoutDefault = 15000 ;
my $gMatchTimeoutDefault = 0 ;
# we save the last occurrence of each instance as it fires.
my %gRulePrevInstances ;
my $gRuleInstCurrent ;
my @gWinningRulesForInstanceCreation ;
# the last set of matches from a regex
my @gMatches ;
my @gScripts ; # rule scripts from cmd line
my %gScripts ; # hash of the scripts
my @gLogFiles ; # log files to process
my $gLogFile ; # current log file entry processed
# allowed actions
my $gActionTypes =
"CREATE COMPLETE DESTROY TIMEOUT " .
"MATCH_TIMEOUT MISSING INCOMPLETE" ;
# buffer for output
my %gBuffers ;
# should logengine continue processing?
my $gLogengineQuitFlag = 0 ;
# CACHEMATCH buffer and data
my @gCacheMatches ; # array of CACHEMATCH items
# line last read (available to scripts)
my $LINE_ID ; # unique number for each line read
my $LINE_LASTREAD = "" ;
# set some variables available for the scripts
my $LINENUMBER_CURRENT = 0 ;
my $LINENUMBER_START = 0 ;
my $LINENUMBER_STOP = 0 ;
my $LINENUMBER_RANGE = "0,0" ;
# handy file access options (for optimized file i/o)
my $gFILE_OPENMODE_SIMPLE = -1 ;
my $gFILE_OPENMODE_NORMAL = 0 ;
my $gFILE_OPENMODE_INTERNAL_BUFFER = 1 ;
my $gFILE_MODE_INPUT = 0 ;
my $gFILE_MODE_OUTPUT = 1 ;
my $gFILE_MODE_APPEND = 2 ;
my $gFileMode = $gFILE_OPENMODE_NORMAL ;
#######################################################################
# a little different here; usually I do "exit( main() ) ;" here.
# however, i want the user to have the ability to define variables and
# so on at the global level.
# initialize
my $rc = init() ;
exit( $rc ) if( $gRC_SUCCESS != $rc ) ;
# first, we pre-declare global macros
my $macros = "" ;
my $rule ;
foreach $rule (@{$gRules->{ARRAY}}) {
if( $rule->{IS_MACRO} ) {
# a macro is a pseudo-rule with no actions
my $arBeginMatch = $rule->{BEGIN_MATCH} ;
my $match = ${$arBeginMatch}[0] ;
my $newResult = {
EXPANDED => "",
ERROR => "",
RESOLVED_ITEMS => {},
OPEN_PARENS => 0,
RUNTIME => 0,
IDX => 0,
} ;
my $rc = resolveExpandItem(
$rule, 'BEGIN_MATCH', 0, $match, $newResult ) ;
my $expanded = $newResult->{EXPANDED} ;
$expanded =~ s/\$$/\\\$/ ;
# add to our list
$macros .= " " if( length( $macros ) ) ;
$macros .= "\$$rule->{NAME}=\"$expanded\" ;" ;
} #if
} #foreach
ABR::verboseprint( "Executing user-defined macros: '$macros'\n" ) ;
no strict ;
$macros =~ s/(\\)/\\$1/g ;
#print "macros='$macros'\n" ;
eval "$macros" ;
my $result = $@ ;
use strict ;
if( length( $result ) ) {
print "Macro Definition failure: '$result'\n" ;
} #if
# we now use this opportunity to declare any user variables at the
# global level.
my $sharedCode = "" ;
my $codeEntry ;
foreach $codeEntry (@gSharedCode) {
my $code = $codeEntry->{VALUE} ;
ABR::verboseprint( "Executing user-defined shared-code: '$sharedCode'\n" ) ;
no strict ;
eval "$code" ;
my $result = $@ ;
use strict ;
if( length( $result ) ) {
print "Shared Code '$codeEntry->{NAME}' failure: '$result'\n" ;
print " Code: '$code'\n" ;
} #if
} #foreach
# now, we ensure that every user-defined option passed on the command
# line was referenced
my $key ;
my $userOptError = 0 ;
foreach $key (sort( keys( %gUserOptions ) )) {
if( !$gUserOptionsUsed{$key} ) {
title() ;
$userOptError = 1 ;
print "Error: unreferenced user option '$key'\n" ;
} #if
} #foreach
if( $userOptError ) {
print "\n" ;
usage() ;
exit( $gRC_CMD_LINE_SWITCH ) ;
} #if
# if successful init, run program
if( !$rc ) {
$rc = run() ;
# terminate the program
my $donerc = done( $rc ) ;
# if we didn't have an error from above, use the return code
# from the termination routine
$rc = $donerc if( !$rc ) ;
} #if
# the final result
exit( $rc ) ;
#######################################################################
# PROGRAM IMPLEMENTATION
############################################################
# utilities
sub title {
return if( $gPrintedTitle ) ;
$gPrintedTitle = 1 ;
print "$0, v1.0\n" ;
print "Copyright (c) 2001-2005 Andy Bruce\n" ;
print "\n" ;
print "With optimized caching, pre-compiled regex,\n" ;
print "and multi-state match support\n" ;
print "\n" ;
} #title
############################################################
# display usage screen
sub usage {
return if( $gPrintedUsage ) ;
$gPrintedUsage = 1 ;
title() ;
print "Usage:\n" ;
print " $0 -r(ules) <rules-script-name>\n" ;
print " -stdin -(no)forever -sort <asc/desc/none>\n" ;
print " -log(file) <logfile-spec> -status <lines>\n" ;
print " -study <length> -user \"name=value ...\"\n" ;
print " -buffer <KB to buffer> -title -version -verbose -debug -?\n" ;
print "Options:\n" ;
print " -rules - Can be repeated for multiple scripts\n" ;
print " -stdin - Read rules from stdin\n" ;
print " -forever - indicates the next log file should be read forever\n" ;
print " -sort - sorts the next log file names read from disk\n" ;
print " -logfile - Can be repeated to process multiple logs\n" ;
print " -status - display info on files as we process them\n" ;
print " -(no)fast - optimizes rules processing for simple rules\n" ;
print " -study - tweak that *may* speed up processing\n" ;
print " -user - allows you to pass cmd line args to rules scripts\n" ;
print " -buffer - use buffered I/O\n" ;
print " -title - force the title to print\n" ;
print " -version - prints only version information\n" ;
print " -verbose - verbose mode--prints detailed load information\n" ;
print " -debug - debug mode--prints numerous messages\n" ;
print " -dump - print loaded script file and exit\n" ;
print " -? - prints this usage screen (also -help or -usage)\n" ;
print " -man - prints usage screen with many notes\n" ;
if( $gMAN ) {
print "\n" ;
print "MANUAL PAGE NOTES:\n" ;
print " -buffer\n" ;
print " This switch allows the caller to indicate how big the\n" ;
print " internal buffer used to read data files should be. The\n" ;
print " default value is 16 (16,384 bytes). Many tests show that\n" ;
print " this default provides the best program performance. Use a\n" ;
print " value of zero to use the native I/O buffering provided by\n" ;
print " Perl.\n" ;
print "\n" ;
print " -fast\n" ;
print " This switch is turned ON by default, which means that\n" ;
print " *simple* rule matches don't fire the CREATE or DESTROY\n" ;
print " actions. Only the COMPLETE action gets fired. This greatly\n" ;
print " improves the program's performance, but gives the rule\n" ;
print " definer less notifications.\n" ;
print "\n" ;
print " A *simple* rule is one which has only a single BEGIN\n" ;
print " statement defined for it. Use '-nofast' to ensure that\n" ;
print " CREATE/DESTROY actions get fired for every rule.\n" ;
print " Example:\n" ;
print " [SIMPLE_RULE]\n" ;
print " BEGIN=^Hello, World!\n" ;
print " Action.COMPLETE=\"Another match\@\$LINENUMBER_START\\n\"\n" ;
print "\n" ;
print " -forever\n" ;
print " Use this switch to indicate that a file should be read\n" ;
print " continuously. This allows you to emulate 'tail' function-\n" ;
print " ality in the logengine. Use it when you're scanning a file\n" ;
print " that's being updated as you scan it.\n" ;
print "\n" ;
print " -sort\n" ;
print " When you specify an argument for the -logfile option, you\n" ;
print " may specify glob wildcards (e.g. 'mylog.*'). By default,\n" ;
print " the logengine loads these files in the order it reads them\n" ;
print " from the disk (not necessarily sorted on all OS's). You can\n" ;
print " control the sort order by using ASC, DESC, or NONE. Keep in\n" ;
print " mind that you can use multiple -logfile options on the command\n" ;
print " line; the last -sort option read controls how the next set of\n" ;
print " log files gets read from the disk.\n" ;
print "\n" ;
print " -study\n" ;
print " This switch submits each line with a length equal to\n" ;
print " or greater than the specified value to extra analysis.\n" ;
print " This extra analysis rarely improves the overall program\n" ;
print " performance, but where speed is of the essence it's worth\n" ;
print " investigating this switch with different values. Using a\n" ;
print " value of 100 or more generally provides the best results.\n" ;
print "\n" ;
print " -user\n" ;
print " This switch allows you to pass cmd line arguments directly\n" ;
print " to a loaded rules script. The next argument should always be\n" ;
print " wrapped in quotes, and should have the form name=value:\n" ;
print " -user \"myoption=myvalue\"\n" ;
print " The logengine saves these user variables, and rule scripts\n" ;
print " can access them by using the LOGENGINE_GET_USER_OPT function.\n" ;
print "\n" ;
print " The logengine allows you to specify multiple occurrences of\n" ;
print " the same option; the LOGENGINE_GET_USER_OPT function returns\n" ;
print " an array of all the values that the user specified for an\n" ;
print " option on the command line.\n" ;
} #if
} #usage
############################################################
# display error and exit
sub _errorExit {
my( $rc, $msg ) = @_ ;
title() ;
print "Error: $msg\n" ;
print "\n" ;
exit( $rc ) ;
} #_errorExit
############################################################
# display error and usage screen
sub _errorHelp {
my( $rc, $msg ) = @_ ;
title() ;
print "Error: $msg\n" ;
print "\n" ;
usage() ;
return $rc ;
} #_errorHelp
############################################################
# file handle operations
sub fileOpen {
my( $fname, $openMode, $mode ) = @_ ;
# open the file
local( *FILEHANDLE ) ;
my $error ;
if( $gFILE_OPENMODE_SIMPLE == $openMode ) {
return undef if( !open( FILEHANDLE, "$fname" ) ) ;
return *FILEHANDLE ;
} elsif( $gFILE_OPENMODE_NORMAL == $openMode ) {
# open the file using normal perl "open"
my $_mode = "" ;
# determine the mode (passed with filename)
if( $gFILE_MODE_INPUT == $mode ) {
$_mode = "< " ;
} elsif( $gFILE_MODE_OUTPUT == $mode ) {
$_mode = "> " ;
} elsif( $gFILE_MODE_APPEND == $mode ) {
$_mode = ">> " ;
} #if
# do the open
return undef if( !open( FILEHANDLE, "$_mode$fname" ) ) ;
} else {
my $_mode ;
no strict ;
if( $gFILE_MODE_INPUT == $mode ) {
$_mode = O_RDONLY ;
} elsif( $gFILE_MODE_OUTPUT == $mode ) {
$_mode = O_WRONLY ;
} elsif( $gFILE_MODE_APPEND == $mode ) {
$_mode = O_APPEND ;
} #if
use strict ;
return undef if( !sysopen( FILEHANDLE, "$fname", $_mode ) ) ;
# do this so we get proper bytes
binmode( FILEHANDLE ) ;
} #if
# create object for user
my $handle = {
HANDLE => *FILEHANDLE,
OPEN_MODE => $openMode,
MAX_BUFFER_SIZE => ( $gBUFFER * 1024 ),
BUFFER => undef,
OFS => 0,
LEN => 0,
BYTES_READ => 0,
BYTES_WRITTEN => 0,
ERROR => $error,
} ;
return $handle ;
} #fileOpen
sub fileReady {
my $handle = shift() ;
# sanity
return -1 if( !defined( $handle ) ) ;
# on Windows, we can't use select
if( $ABR::gOsIsWindows ) {
# if the file is open, it's ready to read
return 1 ;
} #if
# on real OSs, we can use select
# first, construct the bitmask
my( $rin, $win, $ein ) ;
$rin = $win = $ein = "" ;
vec( $rin, fileno( $handle->{HANDLE} ), 1 ) = 1 ;
$ein = $rin | $win ;
# now, do the select with no delay (is data ready now?)
my( $nfound, $timeleft, $rout, $wout, $eout, $timeout ) ;
( $nfound, $timeleft ) =
select( $rout = $rin, $wout = $win, $eout = $ein, 0 ) ;
# final result
return $nfound ;
} #fileReady
sub fileReadLine {
my $handle = shift() ;
# on Windows, we must simply read what's available
if( $gFILE_OPENMODE_NORMAL == $handle->{OPEN_MODE} ) {
my $line ;
my $fileHandle = $handle->{HANDLE} ;
chomp( $line = <$fileHandle> ) ;
$line =~ s/[\n\r]+$// ;
return $line ;
} else {
# do we have anything in our buffer?
my( $line, $idx, $len ) ;
while( 1 ) {
# load buffer if necessary. if we're at the end,
# then exit the loop.
if( $handle->{OFS} >= $handle->{LEN} ) {
# now load data (large chunks)
$len = sysread( $handle->{HANDLE},
$handle->{BUFFER}, $handle->{MAX_BUFFER_SIZE} ) ;
return undef if( !$len ) ;
# save data
$handle->{OFS} = 0 ;
$handle->{LEN} = $len ;
$handle->{BYTES_READ} += $len ;
} #if
# find first 0x0A (line feed)
$idx = index( $handle->{BUFFER}, chr( 0x0A ),
$handle->{OFS} ) ;
# if not found, add the *entire* buffer to the
# line and reloop (handles long lines)
if( $idx < 0 ) {
$line .= $handle->{BUFFER} ;
$handle->{OFS} = $handle->{LEN} ;
next ;
} #if
# append everything to the line, increment the
# offset
$len = $idx - $handle->{OFS} ;
if( $len ) {
$line .= substr( $handle->{BUFFER},
$handle->{OFS}, $len ) ;
} #if
$handle->{OFS} = $idx + 1 ;
# get rid of the CR if necessary
if( length( $line ) ) {
if( substr( $line, $len - 1 ) eq chr( 0x0D ) ) {
chop( $line ) ;
} #if
} #if
# exit loop
if( !defined( $line ) ) {
# no problems here, just an empty line
$line = "" ;
} #if
return $line ;
} #while
# the completed line
return $line ;
} #if
} #fileReadLine
sub fileClose {
# we get a "pointer" to the file object. this allows
# us to update the file object itself once we close
# the file. this allows all file functions to be
# called on closed objects without errors.
my $refHandle = shift ;
return 0 if( !defined( $refHandle ) ) ;
return 0 if( !defined( $$refHandle ) ) ;
my $handle = $$refHandle ;
if( defined( $handle->{HANDLE} ) ) {
close( $handle->{HANDLE} ) ;
} #if
$$refHandle = undef ;
} #fileClose
############################################################
# validate the command line
sub parseCmdLine {
my $rc = $gRC_SUCCESS ;
# valid states
my $stateNone = 0 ;
my $stateScript = 1 ;
my $stateLogFile = 2 ;
my $stateStatus = 3 ;
my $stateStudy = 4 ;
my $stateBuffer = 5 ;
my $stateSort = 6 ;
my $stateUser = 7 ;
my $state = $stateNone ;
my $hasAll = 0 ;
# iterate over arguments
my $arg ;
foreach $arg (@ARGV) {
if( $state == $stateNone ) {
# look for each of our options
if( $arg =~ /^--?vers(ion)?$/i ) {
# show title/version only; then exit
title() ;
exit( $gRC_SUCCESS ) ;
} elsif( $arg =~ /^--?title$/i ) {
# show title/version and keep going
title() ;
} elsif( $arg =~ /^--?verb(ose)?$/i ) {
$ABR::gVERBOSE = 1 ;
} elsif( $arg =~ /^--?d(ebug)?$/i ) {
$ABR::gDEBUG = 1 ;
} elsif( $arg =~ /^(--?h(elp)?|--?\?|--?u(sage)?)$/ ) {
usage() ;
return $gRC_HELP ;
} elsif( $arg =~ /^--?man$/i ) {
$gMAN = 1 ;
usage() ;
return $gRC_HELP ;
} elsif( $arg =~ /^--?r(ules)?$/i ) {
# next arg should be a rules script
$state = $stateScript ;
} elsif( $arg =~ /^--?stdin$/i ) {
# we make an entry
my $script = {
NAME => 'STDIN',
RULES => undef,
} ;
push( @gScripts, $script ) ;
$gScripts{$script->{NAME}} = $script ;
} elsif( $arg =~ /^--?dump$/i ) {
$gDUMP = 1 ;
} elsif( $arg =~ /^--?(no)?forever$/i ) {
# should next log file be kept open forever?
if( $arg =~ /noforever/ ) {
$gFOREVER = 0 ;
} else {
$gFOREVER = 1 ;
} #if
} elsif( $arg =~ /^--?l(og)?(file)?$/i ) {
# next arg should be a log file to process
$state = $stateLogFile ;
} elsif( $arg =~ /^--?status$/i ) {
# next arg should be the number lines
$state = $stateStatus ;
} elsif( $arg =~ /^--?study$/i ) {
# next arg should be the length for a study to pop
$state = $stateStudy ;
} elsif( $arg =~ /^--?buffer$/i ) {
# next arg should be the length for a study to pop
$state = $stateBuffer ;
} elsif( $arg =~ /^--?sort$/i ) {
# next arg should be the type of sorting to occur
$state = $stateSort ;
} elsif( $arg =~ /^--?user$/i ) {
# next arg should be the type of sorting to occur
$state = $stateUser ;
} elsif( $arg =~ /^--?(no)?fast$/i ) {
# turn fast processing on/off
if( $arg =~ /nofast/ ) {
$gFAST = 0 ;
} else {
$gFAST = 1 ;
} #if
} else {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"Invalid switch: $arg" ) ;
} #if
} elsif( $stateScript == $state ) {
my $script = {
NAME => $arg,
RULES => undef,
} ;
push( @gScripts, $script ) ;
$gScripts{$script->{NAME}} = $script ;
$state = $stateNone ;
} elsif( $stateLogFile == $state ) {
# update the file name passed
if( $ABR::gOsIsWindows ) {
$arg =~ s/\\/\//g ;
} # if
# check for STDIN
my @files ;
if( $arg ne '-' ) {
# get the directory
my $idx = rindex( $arg, "/" ) ;
my $dir = "." ;
my $file = $arg ;
if( $idx > 0 ) {
$dir = substr( $arg, 0, $idx ) ;
$file = substr( $arg, $idx + 1 ) ;
} #if
# load the log files
@files = ABR::path_readDirSpec( $dir, $file ) ;
if( !scalar( @files ) ) {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"Error reading logfile '$arg'" ) ;
} #if
} else {
push( @files, $arg ) ;
} #if
# sort appropriately
SWITCH: {
( $gSORT == $gSORT_ASC ) && do {
@files = sort( @files ) ;
last SWITCH ;
} ;
( $gSORT == $gSORT_DESC ) && do {
@files = sort( {$b cmp $a} @files ) ;
last SWITCH ;
} ;
} #SWITCH
# now create the logfile entries
my $file ;
foreach $file (@files) {
my $logfile = {
NAME => $file,
KEEP_OPEN => $gFOREVER,
} ;
push( @gLogFiles, $logfile ) ;
} #foreach
$state = $stateNone ;
} elsif( $stateStatus == $state ) {
$gSTATUS = $arg ;
if( !( $gSTATUS =~ m/^[0-9]+$/ ) ) {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"\"-status\" requires number of lines" ) ;
} #if
$state = $stateNone ;
} elsif( $stateStudy == $state ) {
$gSTUDY = $arg ;
if( !( $gSTUDY =~ m/^[0-9]+$/ ) ) {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"\"-study\" requires number of lines" ) ;
} #if
$state = $stateNone ;
} elsif( $stateBuffer == $state ) {
$gBUFFER = $arg ;
if( !( $gBUFFER =~ m/^[0-9]+$/ ) ) {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"\"-buffer\" requires KB to buffer" ) ;
} #if
$gFileMode = $gFILE_OPENMODE_INTERNAL_BUFFER ;
$state = $stateNone ;
} elsif( $stateSort == $state ) {
if( $arg =~ m/^asc((end)?ing)?$/i ) {
$gSORT = $gSORT_ASC ;
} elsif( $arg =~ m/^desc((end)?ing)?$/i ) {
$gSORT = $gSORT_DESC ;
} elsif( $arg =~ m/^none$/i ) {
$gSORT = $gSORT_NONE ;
} else {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"\"-sort\" requires none, asc, desc modifier" ) ;
} #if
$state = $stateNone ;
} elsif( $stateUser == $state ) {
# extract name=value
my $data = $arg ;
$data =~ m/^([^=]+)=(.*)$/ ;
if( !defined( $1 ) || !defined( $2 ) ) {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"\"-user\" requires argument in form \"name=value\"" ) ;
} #if
# create array ref if necessary
my $refAr = $gUserOptions{$1} ;
if( !defined( $refAr ) ) {
my @ar ;
$refAr = \@ar ;
$gUserOptions{$1} = $refAr ;
} #if
# save user option to array
push( @{$refAr}, $2 ) ;
$state = $stateNone ;
} else {
return _errorHelp( $gRC_CMD_LINE_INTERNAL,
"Invalid parse state: $state" ) ;
} #if
} #foreach
# we shouldn't be in a state
if( $stateNone != $state ) {
return _errorHelp( $gRC_CMD_LINE_SWITCH,
"Improperly terminated switch" ) ;
} #if
# we must have at least one script file to read
if( !scalar( @gScripts ) ) {
return _errorHelp( $gRC_NO_SCRIPTS,
"Must specify a rules script to read" ) ;
} #if
return $rc ;
} #parseCmdLine
############################################################
# load/compile a script file
sub scriptNewRule {
my( $name, $state ) = @_ ;
# we always clear out the RULE pointer since we're in a
# new section.
$state->{IN_DEFINE_MACRO} = 0 ;
$state->{RULE} = undef ;
# empty section name is invalid. trim the name and test.
$name =~ s/^\s*(.*?)\s*$/$1/ ;
if( !length( $name ) ) {
$state->{ERROR} = "Empty rule name" ;
return $gRC_SCRIPT ;
} #if
# reset rule-specific state info
$state->{RULE} = undef ;
$state->{REGEX_OPTIONS} = "" ;
$state->{IN_DEFINE_MACRO} = 0 ;
$state->{IN_SHARED_CODE} = 0 ;
$state->{MATCH_TIMEOUT} = 0 ;
$state->{MATCH_NEXT_LINE} = 0 ;
# if we have the special keyword DEFINE_MACRO, then we
# set our state and return
$state->{IN_DEFINE_MACRO} = ( $name =~ m/^DEFINE_MACRO$/i ) ;
return $gRC_SUCCESS if( $state->{IN_DEFINE_MACRO} ) ;
# if we have the special keyword SHARED_CODE, then we
# set our state and return
$state->{IN_SHARED_CODE} = ( $name =~ m/^SHARED_CODE$/i ) ;
return $gRC_SUCCESS if( $state->{IN_SHARED_CODE} ) ;
# if we have the special keyword TERMINATION_CODE, then we
# set our state and return
$state->{IN_TERMINATION_CODE} = ( $name =~ m/^TERMINATION_CODE$/i ) ;
return $gRC_SUCCESS if( $state->{IN_TERMINATION_CODE} ) ;
# get the full, qualified name of the rule. this allows
# the same rule to be defined in multiple files; each rule
# is independent.
#my $fullName = uc( "$state->{FNAME}:$name" ) ;
my $fullName = uc( "$name" ) ;
# we are defining a new rule with this section. make sure
# we don't have a duplicate.
my $isDupe = exists( ${$gRules->{HASH}}{$fullName} ) ;
if( $isDupe ) {
$state->{ERROR} = "Duplicate rule '$name'" ;
return $gRC_SCRIPT ;
} #if
# create a new entry
my $rule = {
NAME => $name,
FULLNAME => $fullName,
IS_MACRO => 0,
ENABLED => 1,
SCRIPT => $state->{SCRIPT},
STARTLINE => $state->{LINE_NO},
STOPLINE => $state->{LINE_NO},
ACTION => undef,
PRE_MATCH => [],
BEGIN_MATCH => [],
OPTIONAL_MATCH => [],
END_MATCH => [],
MATCHES => [],
VARS => {},
FOUND => 0,
RULE_TIMEOUT => $gRuleTimeoutDefault,
} ;