Skip to content

Commit

Permalink
Handle unspecified and qualified ("16XX~")
Browse files Browse the repository at this point in the history
Unspecified dates previously could not handle qualification. Unspecified dates also couldn't handle dates with 3 unspecified digits ("1XXX"). This commit adds both those features and tests for those use cases.
  • Loading branch information
ColeDCrawford committed May 28, 2024
1 parent d550274 commit ef24bc7
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 4 deletions.
7 changes: 7 additions & 0 deletions edtf/appsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@
PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1))
PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1))
PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12))
PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10))
PADDING_CENTURY_PRECISION = EDTF.get(
"PADDING_CENTURY_PRECISION", relativedelta(years=100)
)
PADDING_MILLENNIUM_PRECISION = EDTF.get(
"PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000)
)
MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0)
MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0)
MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0)
Expand Down
8 changes: 5 additions & 3 deletions edtf/parser/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,17 +161,19 @@ def f(toks):
Level1Interval.set_parser(level1Interval)

# (* *** unspecified *** *)
yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year")
yearWithOneOrTwoOrThreeUnspecifedDigits = Combine(
digit + (digit ^ "X") + (digit ^ "X") + "X"
)("year")
monthUnspecified = year + "-" + L("XX")("month")
dayUnspecified = yearMonth + "-" + L("XX")("day")
dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day")

unspecified = (
yearWithOneOrTwoUnspecifedDigits
yearWithOneOrTwoOrThreeUnspecifedDigits
^ monthUnspecified
^ dayUnspecified
^ dayAndMonthUnspecified
)
) + Optional(UASymbol)("ua")
Unspecified.set_parser(unspecified)

# (* *** uncertainOrApproxDate *** *)
Expand Down
84 changes: 83 additions & 1 deletion edtf/parser/parser_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,89 @@ def precision(self):


class Unspecified(Date):
pass
def __init__(
self,
year=None,
month=None,
day=None,
significant_digits=None,
ua=None,
**kwargs,
):
for param in ("date", "lower", "upper"):
if param in kwargs:
self.__init__(**kwargs[param])
return
self.year = year # Year is required, but sometimes passed in as a 'date' dict.
self.month = month
self.day = day
self.significant_digits = (
int(significant_digits) if significant_digits else None
)
self.ua = ua if ua else None

def __str__(self):
r = self.year
if self.month:
r += f"-{self.month}"
if self.day:
r += f"-{self.day}"
if self.ua:
r += str(self.ua)
return r

def _get_fuzzy_padding(self, lean):
if not self.ua:
return relativedelta()
multiplier = self.ua._get_multiplier()
padding = relativedelta()

if self.year:
if self.precision == PRECISION_MILLENIUM:
padding += relativedelta(
years=int(
multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years
)
)
elif self.precision == PRECISION_CENTURY:
padding += relativedelta(
years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years)
)
elif self.precision == PRECISION_DECADE:
padding += relativedelta(
years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years)
)
else:
padding += relativedelta(
years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years)
)
if self.month:
padding += relativedelta(
months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months)
)
if self.day:
padding += relativedelta(
days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days)
)

return padding

@property
def precision(self):
if self.day:
return PRECISION_DAY
if self.month:
return PRECISION_MONTH
if self.year:
if self.year.isdigit():
return PRECISION_YEAR
if len(self.year) == 4 and self.year.endswith("XXX"):
return PRECISION_MILLENIUM
if len(self.year) == 4 and self.year.endswith("XX"):
return PRECISION_CENTURY
if len(self.year) == 4 and self.year.endswith("X"):
return PRECISION_DECADE
raise ValueError(f"Unspecified date {self} has no precision")


class Level1Interval(Interval):
Expand Down
7 changes: 7 additions & 0 deletions edtf/parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@
("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")),
("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")),
("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")),
# Unspecified and qualified
# "circa 17th century"
("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")),
("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")),
("1XXX", ("1000-01-01", "1999-12-31")),
("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")),
("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")),
# L1 Extended Interval
# beginning unknown, end 2006
# for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)
Expand Down

0 comments on commit ef24bc7

Please sign in to comment.