From ee9f764f37e152816fc1955ea008440be1075fe2 Mon Sep 17 00:00:00 2001 From: singularitti Date: Thu, 26 Oct 2023 19:16:09 -0400 Subject: [PATCH 1/7] Add stdlib `Dates` to deps --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index c7c3adc..49b6ab3 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ version = "0.4.0" AbInitioSoftwareBase = "df5135bc-470e-46c6-b451-292e27ca5b84" CrystallographyBase = "93b1d1cd-a8ea-4aa5-adb1-b2407ea0ba8d" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" PyFortran90Namelists = "e44308e6-bd5b-11e9-2850-49daf8f1ec40" QuantumESPRESSOBase = "51b62caa-b28f-11e9-38c2-1f67cb498e05" ReadableRegex = "cbbcb084-453d-4c4c-b292-e315607ba6a4" From 038e870b9a5ab334b998b74743b1338623e99b3b Mon Sep 17 00:00:00 2001 From: singularitti Date: Thu, 26 Oct 2023 19:16:35 -0400 Subject: [PATCH 2/7] Add type `TimedItem` --- src/PWscf/output.jl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/PWscf/output.jl b/src/PWscf/output.jl index 124d6e7..c04afac 100644 --- a/src/PWscf/output.jl +++ b/src/PWscf/output.jl @@ -1,8 +1,10 @@ -# using Dates: DateTime, DateFormat +using Dates: Hour, Minute, Millisecond using DataFrames: AbstractDataFrame, DataFrame, groupby using QuantumESPRESSOBase.PWscf using VersionParsing: vparse +export TimedItem + struct SubroutineError name::String cerr::String @@ -425,6 +427,13 @@ function parse_clock(str::AbstractString)::Maybe{AbstractDataFrame} return info end # function parse_clock +struct TimedItem + name::String + cpu::Millisecond + wall::Millisecond + calls::Maybe{UInt64} +end + function parse_input_name(str::AbstractString) m = match(READING_INPUT_FROM, str) return m === nothing ? nothing : only(m) From 07403eac16f3866eeb1ee4e48915131505e56494 Mon Sep 17 00:00:00 2001 From: singularitti Date: Thu, 26 Oct 2023 19:16:52 -0400 Subject: [PATCH 3/7] Add `parse` for `TimedItem` --- src/PWscf/output.jl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/PWscf/output.jl b/src/PWscf/output.jl index c04afac..a37a425 100644 --- a/src/PWscf/output.jl +++ b/src/PWscf/output.jl @@ -434,6 +434,35 @@ struct TimedItem calls::Maybe{UInt64} end +function Base.parse(::Type{TimedItem}, str::AbstractString) + matched = match(TIME_ROW, str) + if isnothing(matched) + return nothing + else + return TimedItem( + matched[1], + parsetime(matched[2]), + parsetime(matched[3]), + parse(UInt64, matched[4]), + ) + end +end + +function parsetime(str::AbstractString) + compound = match(r"(\d+)h\s*(\d+)m", str) + seconds = match(r"(\d+\.\d{2})s", str) + if isnothing(seconds) && !isnothing(compound) + hours = parse(UInt64, compound[1]) + minutes = parse(UInt64, compound[2]) + return convert(Millisecond, Hour(hours) + Minute(minutes)) + elseif isnothing(compound) && !isnothing(seconds) + seconds = parse(Float64, seconds[1]) + return Millisecond(1000seconds) + else + throw(ArgumentError("invalid time format!")) + end +end + function parse_input_name(str::AbstractString) m = match(READING_INPUT_FROM, str) return m === nothing ? nothing : only(m) From bb1853d6264c7065314a373aac3d7475045250cb Mon Sep 17 00:00:00 2001 From: singularitti Date: Thu, 26 Oct 2023 19:18:13 -0400 Subject: [PATCH 4/7] Fix regex `TIMED_ITEM` --- src/PWscf/output.jl | 2 +- src/PWscf/regexes.jl | 33 ++++++++------------------------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/src/PWscf/output.jl b/src/PWscf/output.jl index a37a425..6acfb9b 100644 --- a/src/PWscf/output.jl +++ b/src/PWscf/output.jl @@ -435,7 +435,7 @@ struct TimedItem end function Base.parse(::Type{TimedItem}, str::AbstractString) - matched = match(TIME_ROW, str) + matched = match(TIMED_ITEM, str) if isnothing(matched) return nothing else diff --git a/src/PWscf/regexes.jl b/src/PWscf/regexes.jl index 0621165..82ff5c4 100644 --- a/src/PWscf/regexes.jl +++ b/src/PWscf/regexes.jl @@ -200,31 +200,14 @@ const CONVERGED_ELECTRONS_ENERGY = Regex( "m", ) const TIME_BLOCK = r"(init_run\X+?This run was terminated on:.*)" -# This format is from https://github.com/QEF/q-e/blob/4132a64/PW/src/print_clock_pw.f90#L29-L33. -const SUMMARY_TIME_BLOCK = r""" -(?) -(? -init_run\s+:.* -\s*electrons\s+:.* -\s*(?:update_pot\s+.*)? # This does not always exist. -\s*(?:forces\s+:.*)? # This does not always exist. -\s*(?:stress\s+:.*)? # This does not always exist. -) -"""mx -const TIME_ITEM = Regex( - "\\s*([\\w0-9:]+)\\s+:\\s*$(FIXED_POINT_REAL)s\\sCPU\\s*$(FIXED_POINT_REAL)s\\sWALL\\s\\(\\s*$INTEGER\\scalls\\)", -) -# This format is from https://github.com/QEF/q-e/blob/4132a64/PW/src/print_clock_pw.f90#L35-L36. -const INIT_RUN_TIME_BLOCK = r"Called by (?init_run):(?\X+?)^\s*$"m -# This format is from https://github.com/QEF/q-e/blob/4132a64/PW/src/print_clock_pw.f90#L53-L54. -const ELECTRONS_TIME_BLOCK = r"Called by (?electrons):(?\X+?)^\s*$"m -# This format is from https://github.com/QEF/q-e/blob/4132a64/PW/src/print_clock_pw.f90#L78-L79. -const C_BANDS_TIME_BLOCK = r"Called by (?c_bands):(?\X+?)^\s*$"m -const SUM_BAND_TIME_BLOCK = r"Called by (?sum_band):(?\X+?)^\s*$"m -const EGTERG_TIME_BLOCK = r"Called by (?\*egterg):(?\X+?)^\s*$"m -const H_PSI_TIME_BLOCK = r"Called by (?h_psi):(?\X+?)^\s*$"m -const GENERAL_ROUTINES_TIME_BLOCK = r"(?General routines)(?\X+?)^\s*$"m -const PARALLEL_ROUTINES_TIME_BLOCK = r"(?Parallel routines)(?\X+?)^\s*$"m +const TIME_FORMAT = r"(\d+h\s*\d+m|\d+\.\d{2}s)" +const TIMED_ITEM = + r"([\w:]+)\s*:\s*" * + TIME_FORMAT * + r"\s*CPU\s*" * + TIME_FORMAT * + r"\s*WALL" * + r"\s*\(\s*(\d+)\s*calls\)?$"m # `?$` matches the last row const TERMINATED_DATE = r"This run was terminated on:(.+)" # TODO: Date const JOB_DONE = r"JOB DONE\." # These formats are from https://github.com/QEF/q-e/blob/4132a64/UtilXlib/error_handler.f90#L48-L68. From 2b8cd0aff3245fb5aeff654843de3c42001cef44 Mon Sep 17 00:00:00 2001 From: singularitti Date: Thu, 26 Oct 2023 19:41:19 -0400 Subject: [PATCH 5/7] Fix `TIMED_ITEM` & `parse` for `TimedItem` --- src/PWscf/output.jl | 8 +++----- src/PWscf/regexes.jl | 3 ++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/PWscf/output.jl b/src/PWscf/output.jl index 6acfb9b..cda70d1 100644 --- a/src/PWscf/output.jl +++ b/src/PWscf/output.jl @@ -437,13 +437,11 @@ end function Base.parse(::Type{TimedItem}, str::AbstractString) matched = match(TIMED_ITEM, str) if isnothing(matched) - return nothing + nothing else + name, cpu, wall = matched[1], parsetime(matched[2]), parsetime(matched[3]) return TimedItem( - matched[1], - parsetime(matched[2]), - parsetime(matched[3]), - parse(UInt64, matched[4]), + name, cpu, wall, isnothing(matched[4]) ? nothing : parse(UInt64, matched[5]) ) end end diff --git a/src/PWscf/regexes.jl b/src/PWscf/regexes.jl index 82ff5c4..63c30fa 100644 --- a/src/PWscf/regexes.jl +++ b/src/PWscf/regexes.jl @@ -207,7 +207,8 @@ const TIMED_ITEM = r"\s*CPU\s*" * TIME_FORMAT * r"\s*WALL" * - r"\s*\(\s*(\d+)\s*calls\)?$"m # `?$` matches the last row + r"(\s*\(\s*(\d+)\s*calls\))?" * + r"$" # Match the last row const TERMINATED_DATE = r"This run was terminated on:(.+)" # TODO: Date const JOB_DONE = r"JOB DONE\." # These formats are from https://github.com/QEF/q-e/blob/4132a64/UtilXlib/error_handler.f90#L48-L68. From e180049c6461075778d166b33c9717d3b7edf4f2 Mon Sep 17 00:00:00 2001 From: singularitti Date: Thu, 26 Oct 2023 19:41:37 -0400 Subject: [PATCH 6/7] Define `ParseError` & use it in `parsetime` --- src/PWscf/output.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/PWscf/output.jl b/src/PWscf/output.jl index cda70d1..03b9263 100644 --- a/src/PWscf/output.jl +++ b/src/PWscf/output.jl @@ -11,6 +11,10 @@ struct SubroutineError msg::String end +struct ParseError <: Exception + msg::String +end + export Diagonalization, Preamble, Davidson, @@ -457,7 +461,7 @@ function parsetime(str::AbstractString) seconds = parse(Float64, seconds[1]) return Millisecond(1000seconds) else - throw(ArgumentError("invalid time format!")) + throw(ParseError("unrecognized time format!")) end end From 4e91ca86d69676ee728995a729198c605e561074 Mon Sep 17 00:00:00 2001 From: singularitti Date: Thu, 26 Oct 2023 19:42:14 -0400 Subject: [PATCH 7/7] Deprecate `parse_clock` --- src/PWscf/output.jl | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/PWscf/output.jl b/src/PWscf/output.jl index 03b9263..5c6a9a9 100644 --- a/src/PWscf/output.jl +++ b/src/PWscf/output.jl @@ -35,7 +35,6 @@ export Diagonalization, parse_fft_dimensions, parse_iteration_head, parse_electrons_energies, - parse_clock, parse_input_name, isoptimized, isjobdone, @@ -397,40 +396,6 @@ function parse_fft_dimensions(str::AbstractString)::Maybe{NamedTuple} return (; zip((:ng, :nr1, :nr2, :nr3), parsed)...) end # function parse_fft_dimensions -function parse_clock(str::AbstractString)::Maybe{AbstractDataFrame} - m = match(TIME_BLOCK, str) - m === nothing && return nothing - content = only(m.captures) - - info = DataFrame(; - subroutine=String[], item=String[], CPU=Float64[], wall=Float64[], calls=Int[] - ) - for regex in [ - SUMMARY_TIME_BLOCK - INIT_RUN_TIME_BLOCK - ELECTRONS_TIME_BLOCK - C_BANDS_TIME_BLOCK - SUM_BAND_TIME_BLOCK - EGTERG_TIME_BLOCK - H_PSI_TIME_BLOCK - GENERAL_ROUTINES_TIME_BLOCK - PARALLEL_ROUTINES_TIME_BLOCK - ] - block = match(regex, content) - if block !== nothing - for m in eachmatch(TIME_ITEM, block[:body]) - push!( - info, - [block[:head] m[1] map(x -> parse(Float64, x), m.captures[2:4])...], - ) - end - end - end - # m = match(TERMINATED_DATE, content) - # info["terminated date"] = parse(DateTime, m.captures[1], DateFormat("H:M:S")) - return info -end # function parse_clock - struct TimedItem name::String cpu::Millisecond