Skip to content

Commit

Permalink
Initial implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderKaraberov committed Apr 18, 2021
0 parents commit 67ad053
Show file tree
Hide file tree
Showing 9 changed files with 218 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dist
dist-*
cabal-dev
*.o
*.hi
*.hie
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
cabal.project.local~
.HTF/
.ghc.environment.*
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Revision history for hlogsearch

## 0.1.0.0 -- 2021-04-18

* First version. Implemented basic search for a log entry's timestamp using an offset in the the file as an index
34 changes: 34 additions & 0 deletions Data/Time/UTCTimes.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Data.Time.UTCTimes (
compareLogTimes,
mkUTCTime,
zeroUTCJulianDay
) where

import Data.Time.Clock (UTCTime (UTCTime), diffUTCTime, NominalDiffTime)
import Data.Time.ISO8601 ( parseISO8601 )
import Data.Fixed (Pico)
import Data.Time.Calendar (fromGregorian)
import Data.Time.LocalTime (LocalTime, ZonedTime, timeOfDayToTime, TimeOfDay(TimeOfDay))

-- Compare log entries timestamps using an arbitrary (up to a microsecond) precision
compareLogTimes :: UTCTime -> UTCTime -> NominalDiffTime -> Ordering
compareLogTimes fst snd epsilon
| diff > epsilon = GT
| diff < epsilon && diff > (-epsilon) = EQ
| diff < (-epsilon) = LT
where diff = diffUTCTime fst snd

-- Construct UTC time
mkUTCTime :: (Integer, Int, Int)
-> (Int, Int, Pico)
-> UTCTime
mkUTCTime (year, mon, day) (hour, min, sec) =
UTCTime (fromGregorian year mon day)
(timeOfDayToTime (TimeOfDay hour min sec))

-- Constructs a zero Julian day as UTCTime type.
-- You may use this if you need a more or less reasonable default when working with Maybe UTCTime.
-- It uses a VAX/VMS base time (November 17 1858) as a zero day.
-- http://shanebow.com/page/show/julian-day
zeroUTCJulianDay :: UTCTime
zeroUTCJulianDay = mkUTCTime (1858, 11, 17) (0, 0, 0.000000)
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2021 Oleksandr Karaberov

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
24 changes: 24 additions & 0 deletions Main.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module Main where

import System.FileSearch (searchLog, readLogLine)
import Data.Time.UTCTimes (mkUTCTime)
import Data.Maybe (fromMaybe)
import Data.Time.Clock (UTCTime (UTCTime))

main :: IO ()
main = do
-- Let's assume we need to examine a very narrow window in an old and very large log file (50GB in size).

-- set search parameters (N.B. naturally these should be passed as CLI args).
-- log date we are looking for, or something close to it if this exact timestamp does not exist.
let searchTerm = mkUTCTime (2019, 12, 12) (10, 34, 28.909266) :: UTCTime
-- log file to use for search
let path = "/var/log/cassandra/gc.log"
-- now perform a search.
-- (if you need a more precise than 1 millisecond resolution, then consider using searchLog').
result <- searchLog path searchTerm
-- unpack search result
let (found, offset) = fromMaybe (searchTerm, 0) result
-- print a found log entry
entry <- readLogLine path offset
putStrLn $ "Entry with time: " ++ show found ++ " at offset: " ++ show offset ++ " is: \n" ++ entry
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# hlogsearch

A minimalistic tool written in Haskell to search log entries in arbitrary large and noisy log files by a very precise log datetime (up to 1 microsecond). It can be used either as a standalone tool, or as a helper library to build sophisticated log analysis tools, or even as a mere first step in your larger UNIX pipeline to help analyze logs such as:

```sh
hlogsearch "2021-03-14T14:10:03.000000" gc.log | grep -A 100 -B 100 | awk '{print $9}' | sort | uniq -c
```

## Usage

`cabal run` is all you need.

Refer to `Main.hs` for an example on how to use this tool. `hlogsearch` expects unstructured log file formats with dates printed in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format.
2 changes: 2 additions & 0 deletions Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
75 changes: 75 additions & 0 deletions System/FileSearch.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{-# LANGUAGE MultiWayIf #-}

module System.FileSearch (
searchLog,
searchLog',
searchLogUntil,
readLogLine
) where

import System.IO
( hSeek,
openFile,
hGetLine,
SeekMode(AbsoluteSeek),
IOMode(ReadMode) )
import Data.Maybe (fromMaybe)
import Data.Time.Clock (UTCTime (UTCTime), diffUTCTime, NominalDiffTime)
import Data.Time.UTCTimes (compareLogTimes, mkUTCTime, zeroUTCJulianDay)
import System.PosixCompat (getFileStatus)
import System.PosixCompat.Files (fileSize)
import Data.Time.ISO8601 ( parseISO8601 )

-- Search a log file with 1 millisecond precision searching the whole log file until eof.
searchLog :: String -> UTCTime -> IO (Maybe (UTCTime, Integer))
searchLog file key = do
fileSize <- getFileSize file
-- assume one millisecond log precision as a sensible default.
let precision = 0.001
searchLogUntil file key fileSize precision

-- Same as searchLog but allow to specify a custom log time precision.
searchLog' :: String -> UTCTime -> NominalDiffTime -> IO (Maybe (UTCTime, Integer))
searchLog' file key precision = do
fileSize <- getFileSize file
searchLogUntil file key fileSize precision

-- A more customizable variant of searchLog with additional params notably:
-- high : defines an arbitrary offset to use as a search boundary.
-- precision: defines a custom resolution used to compare log entries' timestamps.
searchLogUntil :: [Char] -> UTCTime -> Integer -> NominalDiffTime -> IO (Maybe (UTCTime, Integer))
searchLogUntil file key high precision =
go file key 0 high where
go _ _ low high = do
let mid = (high + low) `div` 2
pivot <- getLogTime file mid
if | high < low -> return Nothing
| compareLogTimes pivot key precision == GT -> go file key low (mid - 1)
| compareLogTimes pivot key precision == LT -> go file key (mid + 1) high
| otherwise -> return $ Just (pivot, mid)

-- Read a line from a file specified via path at a given offset.
readLogLine :: FilePath -> Integer -> IO String
readLogLine p offset = do
hdl <- openFile p ReadMode
hSeek hdl AbsoluteSeek offset
l <- hGetLine hdl
let diff = toInteger $ length l + 1
hSeek hdl AbsoluteSeek (diff + offset)
hGetLine hdl

-- Internal auxiliary functions

getLogTime :: FilePath -> Integer -> IO UTCTime
getLogTime = (utcTimeFromLog .) . readLogLine

utcTimeFromLog :: IO String -> IO UTCTime
utcTimeFromLog log = do
logEntry <- log
let _:time:_ = words logEntry
return $ fromMaybe zeroUTCJulianDay $ parseISO8601 time

getFileSize :: String -> IO Integer
getFileSize path = do
stat <- getFileStatus path
return $ fromIntegral (fileSize stat)
21 changes: 21 additions & 0 deletions hlogsearch.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cabal-version: >=1.10
name: hlogsearch
version: 0.1.0.0
synopsis: Minimalistic tool to search for log entries in large files.
description:
Allows to find log lines and their position (offset) in large log files using a very
precise log entry datetime as a search key, with up to 1 microsecond precision in case of voluminous and noisy logs.
license: MIT
license-file: LICENSE
author: Oleksandr Karaberov
maintainer: [email protected]
copyright: (c) 2021 Oleksandr Karaberov
category: System Tools
build-type: Simple
extra-source-files: CHANGELOG.md

executable hlogsearch
main-is: Main.hs
other-modules: System.FileSearch, Data.Time.UTCTimes
build-depends: base >=4.14 && <4.15, time, unix-compat, iso8601-time
default-language: Haskell2010

0 comments on commit 67ad053

Please sign in to comment.