Skip to content

Commit

Permalink
Merge pull request #173 from kbase/develop
Browse files Browse the repository at this point in the history
Release v1.3.4
  • Loading branch information
MrCreosote authored May 2, 2022
2 parents 7c1194d + acc6048 commit 795ea92
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 41 deletions.
7 changes: 7 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
### Version 1.3.4
- Alter the behavior of the bulk specification file writers to return an error if the
input `types` parameter is empty.
- Fixed a bug in the csv/tsv bulk specification parser that would case a failure if the
first header of a file had trailing separators. This occurs if a csv/tsv file is opened and
saved by Excel.

### Version 1.3.3
- Fixed a bug in the csv/tsv bulk specification parser that would include an empty entry for
each empty line in the file.
Expand Down
6 changes: 3 additions & 3 deletions staging_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
routes = web.RouteTableDef()
VERSION = "1.3.3"
VERSION = "1.3.4"

_DATATYPE_MAPPINGS = None

Expand All @@ -59,7 +59,7 @@ async def importer_filetypes(request: web.Request) -> web.json_response:
Returns the file types for the configured datatypes. The returned JSON contains two keys:
* datatype_to_filetype, which maps import datatypes (like gff_genome) to their accepted
filetypes (like [FASTA, GFF])
* filetype_to_extensions, which maps file types (e.g. FASTA) to their extensions (e.g.
* filetype_to_extensions, which maps file types (e.g. FASTA) to their extensions (e.g.
*.fa, *.fasta, *.fa.gz, etc.)
This information is currently static over the life of the server.
Expand Down Expand Up @@ -197,7 +197,7 @@ async def write_bulk_specification(request: web.Request) -> web.json_response:
def _createJSONErrorResponse(error_text: str, error_class=web.HTTPBadRequest):
err = json.dumps({"error": error_text})
return error_class(text=err, content_type=_APP_JSON)


@routes.get("/add-acl-concierge")
async def add_acl_concierge(request: web.Request):
Expand Down
6 changes: 3 additions & 3 deletions staging_service/import_specifications/file_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
entry in the list corresponds to a row in the resulting import specification,
and the order of the list defines the order of the rows.
Leave the `data` list empty to write an empty template.
:returns: A mapping of the data types to the files to which they were written.
:returns: A mapping of the data types to the files to which they were written.
"""
# note that we can't use an f string here to interpolate the variables below, e.g.
# order_and_display, etc.
Expand Down Expand Up @@ -82,7 +82,7 @@ def _check_import_specification(types: dict[str, dict[str, list[Any]]]):
Leave the {_DATA} list empty to write an empty template.
"""
if not types:
return
raise ImportSpecWriteException("At least one data type must be specified")
for datatype in types:
# replace this with jsonschema? don't worry about it for now
_check_string(datatype, "A data type")
Expand Down Expand Up @@ -235,4 +235,4 @@ class ImportSpecWriteException(Exception):
"""
An exception thrown when writing an import specification fails.
"""
pass
pass
43 changes: 14 additions & 29 deletions staging_service/import_specifications/individual_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# TODO update to C impl when fixed: https://github.com/Marco-Sulla/python-frozendict/issues/26
from frozendict.core import frozendict
from pathlib import Path
from typing import TextIO, Optional as O, Union, Any
from typing import Optional as O, Union, Any

from staging_service.import_specifications.file_parser import (
PRIMITIVE_TYPE,
Expand All @@ -35,7 +35,7 @@
_HEADER_REGEX = re.compile(f"{_DATA_TYPE} (\\w+){_HEADER_SEP} "
+ f"{_COLUMN_STR} (\\d+){_HEADER_SEP} {_VERSION_STR} (\\d+)")

_MAGIC_TEXT_FILES = {"text/plain", "inode/x-empty"}
_MAGIC_TEXT_FILES = {"text/plain", "inode/x-empty", "application/csv", "text/csv"}


class _ParseException(Exception):
Expand Down Expand Up @@ -63,26 +63,18 @@ def _parse_header(header: str, spec_source: SpecificationSource, maximum_version
return match[1], int(match[2])


def _required_next(
input_: Union[TextIO, Any], # Any really means a csv reader object
spec_source: SpecificationSource,
error: str
) -> Union[str, list[str]]:
# returns a string for a TextIO input or a list for a Reader input
try:
return next(input_)
except StopIteration:
raise _ParseException(Error(ErrorType.PARSE_FAIL, error, spec_source))

def _csv_next(
input_: Union[TextIO, Any], # Any really means a csv reader object
input_: Any, # Any really means a csv reader object
line_number: int,
expected_line_count: int,
expected_line_count: Union[None, int], # None = skip columns check
spec_source: SpecificationSource,
error: str
) -> list[str]:
line = _required_next(input_, spec_source, error)
if len(line) != expected_line_count:
try:
line = next(input_)
except StopIteration:
raise _ParseException(Error(ErrorType.PARSE_FAIL, error, spec_source))
if expected_line_count and len(line) != expected_line_count:
raise _ParseException(Error(
ErrorType.INCORRECT_COLUMN_COUNT,
f"Incorrect number of items in line {line_number}, "
Expand All @@ -91,15 +83,6 @@ def _csv_next(
return line


def _get_datatype(input_: TextIO, spec_source: SpecificationSource, maximum_version: int
) -> tuple[str, int]:
# return is (data type, column count)
return _parse_header(
_required_next(input_, spec_source, "Missing data type / version header").strip(),
spec_source,
maximum_version)


def _error(error: Error) -> ParseResults:
return ParseResults(errors = tuple([error]))

Expand Down Expand Up @@ -155,11 +138,13 @@ def _normalize_headers(
def _parse_xsv(path: Path, sep: str) -> ParseResults:
spcsrc = SpecificationSource(path)
try:
if magic.from_file(str(path), mime=True) not in _MAGIC_TEXT_FILES:
return _error(Error(ErrorType.PARSE_FAIL, "Not a text file", spcsrc))
filetype = magic.from_file(str(path), mime=True)
if filetype not in _MAGIC_TEXT_FILES:
return _error(Error(ErrorType.PARSE_FAIL, "Not a text file: " + filetype, spcsrc))
with open(path, newline='') as input_:
datatype, columns = _get_datatype(input_, spcsrc, _VERSION)
rdr = csv.reader(input_, delimiter=sep) # let parser handle quoting
dthd = _csv_next(rdr, 1, None, spcsrc, "Missing data type / version header")
datatype, columns = _parse_header(dthd[0], spcsrc, _VERSION)
hd1 = _csv_next(rdr, 2, columns, spcsrc, "Missing 2nd header line")
param_ids = _normalize_headers(hd1, 2, spcsrc)
_csv_next(rdr, 3, columns, spcsrc, "Missing 3rd header line")
Expand Down
6 changes: 2 additions & 4 deletions tests/import_specifications/test_file_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ def temp_dir() -> Generator[Path, None, None]:
}
}

def test_noop():
assert write_csv(Path("."), {}) == {}
assert write_tsv(Path("."), {}) == {}

def test_write_csv(temp_dir: Path):
res = write_csv(temp_dir, _TEST_DATA)
Expand Down Expand Up @@ -194,6 +191,7 @@ def test_file_writers_fail():
E = ImportSpecWriteException
file_writers_fail(None, {}, ValueError("The folder cannot be null"))
file_writers_fail(p, None, E("The types value must be a mapping"))
file_writers_fail(p, {}, E("At least one data type must be specified"))
file_writers_fail(
p, {None: 1}, E("A data type cannot be a non-string or a whitespace only string"))
file_writers_fail(
Expand Down Expand Up @@ -281,4 +279,4 @@ def file_writers_fail(path: Path, types: dict, expected: Exception):
assert_exception_correct(got.value, expected)
with raises(Exception) as got:
write_excel(path, types)
assert_exception_correct(got.value, expected)
assert_exception_correct(got.value, expected)
10 changes: 8 additions & 2 deletions tests/import_specifications/test_individual_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def temp_dir() -> Generator[Path, None, None]:


def test_xsv_parse_success(temp_dir: Path):
# Tests a special case where if an xSV file is opened by Excel and then resaved,
# Excel will add separators to the end of the 1st header line. This previously caused
# the parse to fail.
_xsv_parse_success(temp_dir, ',', parse_csv)
_xsv_parse_success(temp_dir, '\t', parse_tsv)

Expand All @@ -48,7 +51,7 @@ def _xsv_parse_success(temp_dir: Path, sep: str, parser: Callable[[Path], ParseR
input_ = temp_dir / str(uuid.uuid4())
with open(input_, "w") as test_file:
test_file.writelines([
"Data type: some_type; Columns: 4; Version: 1\n",
f"Data type: some_type; Columns: 4; Version: 1{s}{s}{s}\n",
f"spec1{s} spec2{s} spec3 {s} spec4\n", # test trimming
f"Spec 1{s} Spec 2{s} Spec 3{s} Spec 4\n",
f"val1 {s} val2 {s} 7 {s} 3.2\n", # test trimming
Expand Down Expand Up @@ -166,7 +169,10 @@ def test_xsv_parse_fail_binary_file(temp_dir: Path):
res = parse_csv(test_file)

assert res == ParseResults(errors=tuple([
Error(ErrorType.PARSE_FAIL, "Not a text file", source_1=SpecificationSource(test_file))
Error(
ErrorType.PARSE_FAIL,
"Not a text file: application/vnd.ms-excel",
source_1=SpecificationSource(test_file))
]))


Expand Down

0 comments on commit 795ea92

Please sign in to comment.