Skip to content

Commit

Permalink
lark transformer working
Browse files Browse the repository at this point in the history
  • Loading branch information
wpbonelli committed Sep 6, 2024
1 parent dded4cb commit b205400
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 48 deletions.
79 changes: 47 additions & 32 deletions flopy4/lark/__init__.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,39 @@
from lark import Lark

MF6_GRAMMAR = r"""
?start: _NL* _item*
_item: (block | COMMENT) _NL+
// component
component: _NL* (block _NL+)* _NL*
// block
block: _begin _NL params _end
_begin: _BEGIN name [index]
_end: _END name
name: WORD
index: INT
// blocks
block: _paramsblock | _listblock
_paramsblock: _BEGIN paramsblockname _NL params _END paramsblockname
_listblock: _BEGIN listblockname _NL list _END listblockname
paramsblockname: PARAMSBLOCKNAME
listblockname: LISTBLOCKNAME [_blockindex]
_blockindex: INT
_BEGIN: "begin"i
_END: "end"i
// parameter
params: (param _NL)*
param: _key [_value]
_key: KEYS
_value: NUMBER | path | string | array | list
// parameters (higher priority than lists
// since list of records will match also)
params.1: (param _NL)*
param: key | _pair
_pair: key value
key: PARAMNAME
?value: array
| list
| path
| string
| int
| float
// string
string: WORD+
word: WORD
?string: word+
// numbers
int: INT
float: FLOAT
// file path
path: INOUT PATH
Expand All @@ -30,39 +43,41 @@
// array
array: constantarray | internalarray | externalarray
constantarray: "CONSTANT" NUMBER
internalarray: "INTERNAL" [factor] [iprn] (NUMBER* [_NL])*
externalarray: "OPEN/CLOSE" WORD [factor] ["binary"] [iprn]
constantarray: "CONSTANT" float
internalarray: "INTERNAL" [factor] [iprn] (float* [_NL])*
externalarray: "OPEN/CLOSE" PATH [factor] ["binary"] [iprn]
factor: "FACTOR" NUMBER
iprn: "IPRN" INT
// list (adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark)
list: header _NL row*
header: "#" " "? (WORD _SEPARATOR?)+
row: (_anything _SEPARATOR?)+ _NL
_anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT
// list adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark
list.-1: record*
// negative priority for records bc
// the pattern is so indiscriminate.
record.-1: _anything+ _NL
_anything: int | float | word
NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/
_SEPARATOR: /[ ]+/
| "\t"
| ","
// newline
_NL: /(\r?\n[\t ]*)+/
// parameter keys file can be generated
// TODO:
// a parameter key file can be generated
// with the rest of the plugin interface
// and maybe placed in a separate file
KEYS: "K"|"I"|"D"|"S"|"F"|"A"
// rather than known keys hardcoded here
// (and likewise for block names)
PARAMNAME: ("K"|"I"|"D"|"S"|"F"|"A")
PARAMSBLOCKNAME: ("OPTIONS"|"PACKAGEDATA")
LISTBLOCKNAME: "PERIOD"
%import common.SH_COMMENT -> COMMENT
%import common.SIGNED_NUMBER -> NUMBER
%import common.SIGNED_FLOAT
%import common.INT
%import common.FLOAT
%import common.SIGNED_INT -> INT
%import common.SIGNED_FLOAT -> FLOAT
%import common.WORD
%import common.WS_INLINE
%ignore COMMENT
%ignore WS_INLINE
"""

MF6_PARSER = Lark(MF6_GRAMMAR, start="start")
MF6_PARSER = Lark(MF6_GRAMMAR, start="component")
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,6 @@ select = [
]
ignore = [
"F821", # undefined name TODO FIXME
"E722" # do not use bare `except`
"E722", # do not use bare `except`
"E741", # ambiguous variable name
]
115 changes: 100 additions & 15 deletions test/test_lark.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from pprint import pprint
from os import linesep
from pathlib import Path

import pytest
import numpy as np
from lark import Transformer

from flopy4.lark import MF6_PARSER

TEST_PKG = """
TEST_COMPONENT = """
BEGIN OPTIONS
K
I 1
Expand All @@ -14,29 +15,113 @@
F FILEIN some/path
END OPTIONS
BEGIN PACKAGEDATA 1
BEGIN PACKAGEDATA
A INTERNAL 1.0 2.0 3.0
END PACKAGEDATA
BEGIN PERIOD 1
FIRST
FREQUENCY 2
END PERIOD 1
BEGIN PERIOD 2
STEPS 1 2 3
END PERIOD 2
"""


def test_parse_mf6():
tree = MF6_PARSER.parse(TEST_PKG)
# this is working, check it with:
# pytest test/test_lark.py::test_parse_mf6 -s
print(tree.pretty())
def test_parse():
tree = MF6_PARSER.parse(TEST_COMPONENT)
# view the parse tree with e.g.
# pytest test/test_lark.py::test_parse -s
print(linesep + tree.pretty())


class MF6Transformer(Transformer):
# TODO
pass
def key(self, k):
(k,) = k
return str(k).lower()

def word(self, w):
(w,) = w
return str(w)

def path(self, p):
_, p = p
return Path(p)

def string(self, s):
return " ".join(s)

def int(self, i):
(i,) = i
return int(i)

def float(self, f):
(f,) = f
return float(f)

def array(self, a):
(a,) = a
return a

def constantarray(self, a):
# TODO factor out `ConstantArray`
# array-like class from `MFArray`
# with deferred shape and use it
pass

def internalarray(self, a):
factor = a[0]
array = np.array(a[2:])
if factor is not None:
array *= factor
return array

def externalarray(self, a):
# TODO
pass

record = tuple
list = list

def param(self, p):
k = p[0]
v = True if len(p) == 1 else p[1]
return k, v

params = dict

def block(self, b):
return tuple(b[:2])

def paramsblockname(self, bn):
return str(bn[0]).lower()

def listblockname(self, bn):
name = str(bn[0])
if len(bn) == 2:
index = int(bn[1])
name = f"{name} {index}"
return name.lower()

component = dict


MF6_TRANSFORMER = MF6Transformer()


@pytest.mark.xfail
def test_transform_mf6():
tree = MF6_PARSER.parse(TEST_PKG)
def test_transform():
tree = MF6_PARSER.parse(TEST_COMPONENT)
data = MF6_TRANSFORMER.transform(tree)
pprint(data)
assert data["options"] == {
"d": 1.0,
"f": Path("some/path"),
"i": 1,
"k": True,
"s": "hello world",
}
assert np.array_equal(data["packagedata"]["a"], np.array([1.0, 2.0, 3.0]))
assert data["period 1"][0] == ("FIRST",)
assert data["period 1"][1] == ("FREQUENCY", 2)
assert data["period 2"][0] == ("STEPS", 1, 2, 3)

0 comments on commit b205400

Please sign in to comment.