-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 67ad053
Showing
9 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
dist | ||
dist-* | ||
cabal-dev | ||
*.o | ||
*.hi | ||
*.hie | ||
*.chi | ||
*.chs.h | ||
*.dyn_o | ||
*.dyn_hi | ||
.hpc | ||
.hsenv | ||
.cabal-sandbox/ | ||
cabal.sandbox.config | ||
*.prof | ||
*.aux | ||
*.hp | ||
*.eventlog | ||
.stack-work/ | ||
cabal.project.local | ||
cabal.project.local~ | ||
.HTF/ | ||
.ghc.environment.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Revision history for hlogsearch | ||
|
||
## 0.1.0.0 -- 2021-04-18 | ||
|
||
* First version. Implemented basic search for a log entry's timestamp using an offset in the the file as an index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
module Data.Time.UTCTimes ( | ||
compareLogTimes, | ||
mkUTCTime, | ||
zeroUTCJulianDay | ||
) where | ||
|
||
import Data.Time.Clock (UTCTime (UTCTime), diffUTCTime, NominalDiffTime) | ||
import Data.Time.ISO8601 ( parseISO8601 ) | ||
import Data.Fixed (Pico) | ||
import Data.Time.Calendar (fromGregorian) | ||
import Data.Time.LocalTime (LocalTime, ZonedTime, timeOfDayToTime, TimeOfDay(TimeOfDay)) | ||
|
||
-- Compare log entries timestamps using an arbitrary (up to a microsecond) precision | ||
compareLogTimes :: UTCTime -> UTCTime -> NominalDiffTime -> Ordering | ||
compareLogTimes fst snd epsilon | ||
| diff > epsilon = GT | ||
| diff < epsilon && diff > (-epsilon) = EQ | ||
| diff < (-epsilon) = LT | ||
where diff = diffUTCTime fst snd | ||
|
||
-- Construct UTC time | ||
mkUTCTime :: (Integer, Int, Int) | ||
-> (Int, Int, Pico) | ||
-> UTCTime | ||
mkUTCTime (year, mon, day) (hour, min, sec) = | ||
UTCTime (fromGregorian year mon day) | ||
(timeOfDayToTime (TimeOfDay hour min sec)) | ||
|
||
-- Constructs a zero Julian day as UTCTime type. | ||
-- You may use this if you need a more or less reasonable default when working with Maybe UTCTime. | ||
-- It uses a VAX/VMS base time (November 17 1858) as a zero day. | ||
-- http://shanebow.com/page/show/julian-day | ||
zeroUTCJulianDay :: UTCTime | ||
zeroUTCJulianDay = mkUTCTime (1858, 11, 17) (0, 0, 0.000000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2021 Oleksandr Karaberov | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
module Main where | ||
|
||
import System.FileSearch (searchLog, readLogLine) | ||
import Data.Time.UTCTimes (mkUTCTime) | ||
import Data.Maybe (fromMaybe) | ||
import Data.Time.Clock (UTCTime (UTCTime)) | ||
|
||
main :: IO () | ||
main = do | ||
-- Let's assume we need to examine a very narrow window in an old and very large log file (50GB in size). | ||
|
||
-- set search parameters (N.B. naturally these should be passed as CLI args). | ||
-- log date we are looking for, or something close to it if this exact timestamp does not exist. | ||
let searchTerm = mkUTCTime (2019, 12, 12) (10, 34, 28.909266) :: UTCTime | ||
-- log file to use for search | ||
let path = "/var/log/cassandra/gc.log" | ||
-- now perform a search. | ||
-- (if you need a more precise than 1 millisecond resolution, then consider using searchLog'). | ||
result <- searchLog path searchTerm | ||
-- unpack search result | ||
let (found, offset) = fromMaybe (searchTerm, 0) result | ||
-- print a found log entry | ||
entry <- readLogLine path offset | ||
putStrLn $ "Entry with time: " ++ show found ++ " at offset: " ++ show offset ++ " is: \n" ++ entry |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# hlogsearch | ||
|
||
A minimalistic tool written in Haskell to search log entries in arbitrary large and noisy log files by a very precise log datetime (up to 1 microsecond). It can be used either as a standalone tool, or as a helper library to build sophisticated log analysis tools, or even as a mere first step in your larger UNIX pipeline to help analyze logs such as: | ||
|
||
```sh | ||
hlogsearch "2021-03-14T14:10:03.000000" gc.log | grep -A 100 -B 100 | awk '{print $9}' | sort | uniq -c | ||
``` | ||
|
||
## Usage | ||
|
||
`cabal run` is all you need. | ||
|
||
Refer to `Main.hs` for an example on how to use this tool. `hlogsearch` expects unstructured log file formats with dates printed in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import Distribution.Simple | ||
main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
{-# LANGUAGE MultiWayIf #-} | ||
|
||
module System.FileSearch ( | ||
searchLog, | ||
searchLog', | ||
searchLogUntil, | ||
readLogLine | ||
) where | ||
|
||
import System.IO | ||
( hSeek, | ||
openFile, | ||
hGetLine, | ||
SeekMode(AbsoluteSeek), | ||
IOMode(ReadMode) ) | ||
import Data.Maybe (fromMaybe) | ||
import Data.Time.Clock (UTCTime (UTCTime), diffUTCTime, NominalDiffTime) | ||
import Data.Time.UTCTimes (compareLogTimes, mkUTCTime, zeroUTCJulianDay) | ||
import System.PosixCompat (getFileStatus) | ||
import System.PosixCompat.Files (fileSize) | ||
import Data.Time.ISO8601 ( parseISO8601 ) | ||
|
||
-- Search a log file with 1 millisecond precision searching the whole log file until eof. | ||
searchLog :: String -> UTCTime -> IO (Maybe (UTCTime, Integer)) | ||
searchLog file key = do | ||
fileSize <- getFileSize file | ||
-- assume one millisecond log precision as a sensible default. | ||
let precision = 0.001 | ||
searchLogUntil file key fileSize precision | ||
|
||
-- Same as searchLog but allow to specify a custom log time precision. | ||
searchLog' :: String -> UTCTime -> NominalDiffTime -> IO (Maybe (UTCTime, Integer)) | ||
searchLog' file key precision = do | ||
fileSize <- getFileSize file | ||
searchLogUntil file key fileSize precision | ||
|
||
-- A more customizable variant of searchLog with additional params notably: | ||
-- high : defines an arbitrary offset to use as a search boundary. | ||
-- precision: defines a custom resolution used to compare log entries' timestamps. | ||
searchLogUntil :: [Char] -> UTCTime -> Integer -> NominalDiffTime -> IO (Maybe (UTCTime, Integer)) | ||
searchLogUntil file key high precision = | ||
go file key 0 high where | ||
go _ _ low high = do | ||
let mid = (high + low) `div` 2 | ||
pivot <- getLogTime file mid | ||
if | high < low -> return Nothing | ||
| compareLogTimes pivot key precision == GT -> go file key low (mid - 1) | ||
| compareLogTimes pivot key precision == LT -> go file key (mid + 1) high | ||
| otherwise -> return $ Just (pivot, mid) | ||
|
||
-- Read a line from a file specified via path at a given offset. | ||
readLogLine :: FilePath -> Integer -> IO String | ||
readLogLine p offset = do | ||
hdl <- openFile p ReadMode | ||
hSeek hdl AbsoluteSeek offset | ||
l <- hGetLine hdl | ||
let diff = toInteger $ length l + 1 | ||
hSeek hdl AbsoluteSeek (diff + offset) | ||
hGetLine hdl | ||
|
||
-- Internal auxiliary functions | ||
|
||
getLogTime :: FilePath -> Integer -> IO UTCTime | ||
getLogTime = (utcTimeFromLog .) . readLogLine | ||
|
||
utcTimeFromLog :: IO String -> IO UTCTime | ||
utcTimeFromLog log = do | ||
logEntry <- log | ||
let _:time:_ = words logEntry | ||
return $ fromMaybe zeroUTCJulianDay $ parseISO8601 time | ||
|
||
getFileSize :: String -> IO Integer | ||
getFileSize path = do | ||
stat <- getFileStatus path | ||
return $ fromIntegral (fileSize stat) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
cabal-version: >=1.10 | ||
name: hlogsearch | ||
version: 0.1.0.0 | ||
synopsis: Minimalistic tool to search for log entries in large files. | ||
description: | ||
Allows to find log lines and their position (offset) in large log files using a very | ||
precise log entry datetime as a search key, with up to 1 microsecond precision in case of voluminous and noisy logs. | ||
license: MIT | ||
license-file: LICENSE | ||
author: Oleksandr Karaberov | ||
maintainer: [email protected] | ||
copyright: (c) 2021 Oleksandr Karaberov | ||
category: System Tools | ||
build-type: Simple | ||
extra-source-files: CHANGELOG.md | ||
|
||
executable hlogsearch | ||
main-is: Main.hs | ||
other-modules: System.FileSearch, Data.Time.UTCTimes | ||
build-depends: base >=4.14 && <4.15, time, unix-compat, iso8601-time | ||
default-language: Haskell2010 |