diff --git a/dbfread/dbf.py b/dbfread/dbf.py index 65d45ac..90f670d 100644 --- a/dbfread/dbf.py +++ b/dbfread/dbf.py @@ -212,13 +212,47 @@ def deleted(self): def _read_header(self, infile): # Todo: more checks? - self.header = DBFHeader.read(infile) - - if self.encoding is None: - try: - self.encoding = guess_encoding(self.header.language_driver) - except LookupError: - self.encoding = 'ascii' + ''' + Reads the dbf header into memory. + + What could go wrong with reading the DBF header? The structure of the header is found + here: https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm. + + 1. The read operation could fail because the DBF Header does not exist. In this case, + the program should either generate a user message or generate a blank header file. + :param infile: + :return: + ''' + + try: + self.header = DBFHeader.read(infile) + + if self.encoding is None: + try: + self.encoding = guess_encoding ( self.header.language_driver ) + except LookupError: + self.encoding = 'ascii' + except FileNotFoundError: + self.header = dict('DBFHeader', + '= 2: + return float ( data.replace ( b',', b'' ) ) + + # The default ultimate failure should be a NaN value + return float ( 'NaN' ) def parseO(self, field, data): """Parse long field (O) and return float.""" @@ -195,7 +209,7 @@ def parseT(self, field, data): # Offset from julian days (used in the file) to proleptic Gregorian # ordinals (used by the datetime module) - offset = 1721425 # Todo: will this work? + offset = 1721425 if data.strip(): # Note: if the day number is 0, we return None diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_codepages.py b/tests/test_codepages.py new file mode 100644 index 0000000..1ab76f7 --- /dev/null +++ b/tests/test_codepages.py @@ -0,0 +1,12 @@ +from dbfread.codepages import * +from pytest import * + + +def test_guess_encoding(): + + assert guess_encoding(0x00) == 'ascii' + with raises(LookupError): + guess_encoding(0x200) + +if __name__ == '__main__': + pytest.main () diff --git a/tests/test_field_parser.py b/tests/test_field_parser.py index c16f6fb..71e43d2 100644 --- a/tests/test_field_parser.py +++ b/tests/test_field_parser.py @@ -1,7 +1,7 @@ import datetime from decimal import Decimal from pytest import raises -from dbfread.field_parser import FieldParser +from dbfread.field_parser import * class MockHeader(object): dbversion = 0x02 @@ -12,6 +12,15 @@ def __init__(self): self.encoding = 'ascii' self.char_decode_errors = 'strict' + def set_encoding(self, new_encoding): + self.encoding = new_encoding + + def set_year(self, year): + ''' + Access self.header.year and change the year + ''' + self.header.year = year + class MockField(object): def __init__(self, type='', **kwargs): self.type = type @@ -24,12 +33,17 @@ def __getitem__(self, index): else: return dict.__getitem__(self, index) -def make_field_parser(field_type, dbversion=0x02, memofile=None): +def make_field_parser(field_type, dbversion=0x02, memofile=None, encoding=None, year=None): dbf = MockDBF() dbf.header.dbversion = dbversion parser = FieldParser(dbf, memofile) field = MockField(field_type) + if encoding is not None: + dbf.set_encoding(encoding) + if year is not None: + dbf.set_year(year) + def parse(data): return parser.parse(field, data) @@ -47,7 +61,7 @@ def test_C(): assert type(parse(b'test')) == type(u'') def test_D(): - parse = make_field_parser('D') + parse = make_field_parser('D', year=21) assert parse(b'00000000') is None assert parse(b' ') is None @@ -55,7 +69,11 @@ def test_D(): epoch = datetime.date(1970, 1, 1) assert parse(b'19700101') == epoch + new_century = datetime.date(2021,1,1) + assert parse(b'00210101') == new_century + with raises(ValueError): + assert parse(b' 0\0') is None parse(b'NotIntgr') def test_F(): @@ -85,6 +103,8 @@ def test_I(): assert parse(b'\x01\x00\x00\x00') == 1 assert parse(b'\xff\xff\xff\xff') == -1 + + def test_L(): parse = make_field_parser('L') @@ -100,7 +120,7 @@ def test_L(): # Some invalid values. for char in b'!0': with raises(ValueError): - parse(char) + assert parse(char) is None # This also tests B, G and P. def test_M(): @@ -142,6 +162,8 @@ def test_N(): with raises(ValueError): parse(b'okasd') + assert parse(b',') == 'NaN' + parse(b'3,123.4') == 3123.4 def test_O(): """Test double field.""" diff --git a/tests/test_ifiles.py b/tests/test_ifiles.py index 303ff77..de58daf 100644 --- a/tests/test_ifiles.py +++ b/tests/test_ifiles.py @@ -1,4 +1,4 @@ -from dbfread.ifiles import ipat, ifnmatch +from dbfread.ifiles import * assert ipat('mixed') == '[Mm][Ii][Xx][Ee][Dd]' assert ifnmatch('test', 'test') == True diff --git a/tests/test_invalid_value.py b/tests/test_invalid_value.py index a5d57a5..81359c0 100644 --- a/tests/test_invalid_value.py +++ b/tests/test_invalid_value.py @@ -1,5 +1,6 @@ from dbfread.field_parser import InvalidValue + def test_repr(): assert repr(InvalidValue(b'')) == "InvalidValue(b'')" diff --git a/tests/test_memo.py b/tests/test_memo.py index 62421e5..72023d3 100644 --- a/tests/test_memo.py +++ b/tests/test_memo.py @@ -1,6 +1,9 @@ from pytest import raises -from dbfread import DBF -from dbfread import MissingMemoFile + +from dbfread.memo import * +from dbfread.exceptions import MissingMemoFile +from dbfread.dbf import DBF + def test_missing_memofile(): with raises(MissingMemoFile): diff --git a/tests/test_read_and_length.py b/tests/test_read_and_length.py index bd3b963..edd9d3c 100644 --- a/tests/test_read_and_length.py +++ b/tests/test_read_and_length.py @@ -2,8 +2,11 @@ Tests reading from database. """ import datetime + +from dbfread import dbf from pytest import fixture -from dbfread import DBF + +from dbfread.dbf import DBF @fixture def table():