Skip to content

Commit

Permalink
Added option to merge duplicate objects
Browse files Browse the repository at this point in the history
  • Loading branch information
johannesmols committed Sep 20, 2023
1 parent d67de97 commit 8b3723c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 15 deletions.
28 changes: 15 additions & 13 deletions CLI/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ and Args =
| [<Mandatory; ExactlyOnce; AltCommandLine("--of")>] OutputFormat of OcelFormat
| [<AltCommandLine("--i")>] Indented
| [<AltCommandLine("--ruo")>] RemoveUnknownObjects
| [<AltCommandLine("--mdo")>] MergeDuplicateObjects
| [<AltCommandLine("--nv")>] NoValidation
| [<CliPrefix(CliPrefix.None); AltCommandLine("cd")>] ConvertDir of ParseResults<ConvertDirArgs>
| [<CliPrefix(CliPrefix.None); AltCommandLine("cmd")>] ConvertMergeDir of ParseResults<ConvertMergeDirArgs>
Expand All @@ -67,6 +68,7 @@ and Args =
| Indented -> "Specifies that output files should be formatted using indentation."
| RemoveUnknownObjects -> "Remove any object references from events that don't exist in the log."
| NoValidation -> "Specifies that the deserialized log(s) should not be validated before serializing again."
| MergeDuplicateObjects -> "Specifies that identical objects should be merged into one and all event references updated."
| ConvertDir _ -> "Convert a directory of OCEL files."
| ConvertMergeDir _ -> "Convert and merge a directory of OCEL files into a single file."
| ConvertFiles _ -> "Convert one or more OCEL files."
Expand Down Expand Up @@ -133,7 +135,7 @@ let private readOcelFile removeUnknownObjects (path: string) =
None

/// Read multiple OCEL files, merge them, and write them back to an output file in a specified format
let private mergeAndWriteToFile removeUnknownObjects outputFormat formatting validate files out =
let private mergeAndWriteToFile removeUnknownObjects mergeDuplicateObjects outputFormat formatting validate files out =
let mergedLog =
files
|> List.map (fun f -> readOcelFile removeUnknownObjects f)
Expand All @@ -143,6 +145,7 @@ let private mergeAndWriteToFile removeUnknownObjects outputFormat formatting val
files
|> List.fold (fun (state: OCEL.Types.OcelLog) log -> state.MergeWith log) OCEL.Types.OcelLog.Empty

let mergedLog = if mergeDuplicateObjects then mergedLog.MergeDuplicateObjects() else mergedLog
match outputFormat with
| OcelFormat.Json ->
printfn $"Writing log to {out}."
Expand All @@ -168,6 +171,7 @@ let main args =
let formatting = if results.Contains Indented then OCEL.Types.Formatting.Indented else OCEL.Types.Formatting.None
let validate = results.Contains NoValidation |> not
let removeUnknownObjects = results.Contains RemoveUnknownObjects
let mergeDuplicateObjects = results.Contains MergeDuplicateObjects

let cmds =
results.TryGetResult ConvertDir,
Expand All @@ -188,10 +192,10 @@ let main args =
|> fun files ->
printfn $"Found {files.Length} matching files in directory."
files
|> List.map (fun f -> f, readOcelFile removeUnknownObjects f)
|> List.iter (fun (name, log) ->
match log with
|> List.iter (fun name ->
match name |> readOcelFile removeUnknownObjects with
| Some log ->
let log = if mergeDuplicateObjects then log.MergeDuplicateObjects() else log
match outputFormat with
| OcelFormat.Json ->
let fileName = getNewFileName name outDir outputFormat
Expand All @@ -210,8 +214,7 @@ let main args =
OCEL.OcelLiteDB.serialize false outDb log
outDb.Dispose()
| _ -> raise (new ArgumentOutOfRangeException(nameof(outputFormat)))
| None -> ()
)
| None -> ())

| None, Some cmd, None, None ->
let dir = cmd.GetResult ConvertMergeDirArgs.Dir
Expand All @@ -225,14 +228,14 @@ let main args =
|> fun files ->
printfn $"Found {files.Length} matching files in directory."
files
|> fun files -> mergeAndWriteToFile removeUnknownObjects outputFormat formatting validate files out
|> fun files -> mergeAndWriteToFile removeUnknownObjects mergeDuplicateObjects outputFormat formatting validate files out

| None, None, Some cf, None ->
cf.GetResult ConvertFilesArgs.Files
|> List.map (fun f -> f, readOcelFile removeUnknownObjects f)
|> List.iter (fun (name, log) ->
match log with
|> List.iter (fun name ->
match name |> readOcelFile removeUnknownObjects with
| Some log ->
let log = if mergeDuplicateObjects then log.MergeDuplicateObjects() else log
let fileName = getNewFileName name (Path.GetDirectoryName name) outputFormat
match outputFormat with
| OcelFormat.Json ->
Expand All @@ -249,13 +252,12 @@ let main args =
OCEL.OcelLiteDB.serialize false outDb log
outDb.Dispose()
| _ -> raise (new ArgumentOutOfRangeException(nameof(outputFormat)))
| None -> ()
)
| None -> ())

| None, None, None, Some cmf ->
let files = cmf.GetResult ConvertMergeFilesArgs.Files
let out = cmf.GetResult ConvertMergeFilesArgs.Out
mergeAndWriteToFile removeUnknownObjects outputFormat formatting validate files out
mergeAndWriteToFile removeUnknownObjects mergeDuplicateObjects outputFormat formatting validate files out

| _ -> failwith "Only one sub-command allowed at a time."
with e ->
Expand Down
6 changes: 4 additions & 2 deletions CLI/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ There are 4 main usages with a corresponding command:
4. Convert one or more OCEL files, given their specific path, to a specific format, and merge them into a single file

```
USAGE: ocel-cli [--help] --outputformat <json|xml|litedb> [--indented] [--removeunknownobjects] [--novalidation]
[<subcommand> [<options>]]
USAGE: ocel-cli [--help] --outputformat <json|xml|litedb> [--indented] [--removeunknownobjects]
[--mergeduplicateobjects] [--novalidation] [<subcommand> [<options>]]
SUBCOMMANDS:
Expand All @@ -39,6 +39,8 @@ OPTIONS:
--indented, --i Specifies that output files should be formatted using indentation.
--removeunknownobjects, --ruo
Remove any object references from events that don't exist in the log.
--mergeduplicateobjects, --mdo
Specifies that identical objects should be merged into one and all event references updated.
--novalidation, --nv Specifies that the deserialized log(s) should not be validated before serializing again.
--help display this list of options.
```
Expand Down

0 comments on commit 8b3723c

Please sign in to comment.