From aefc8b8a231ee2b375d53753f743bdee343a6252 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 6 Feb 2025 12:27:35 -0500 Subject: [PATCH 1/6] Preliminary combined date parser --- .../converters/calendars/hebrew/hebrew.lark | 24 ++++++------- .../calendars/hebrew/transformer.py | 12 +++---- .../converters/calendars/hijri/transformer.py | 13 +++++-- src/undate/converters/combined.lark | 32 +++++++++++++++++ src/undate/converters/combined.py | 25 +++++++++++++ src/undate/converters/edtf/transformer.py | 5 ++- tests/test_converters/test_combined_parser.py | 35 +++++++++++++++++++ 7 files changed, 125 insertions(+), 21 deletions(-) create mode 100644 src/undate/converters/combined.lark create mode 100644 src/undate/converters/combined.py create mode 100644 tests/test_converters/test_combined_parser.py diff --git a/src/undate/converters/calendars/hebrew/hebrew.lark b/src/undate/converters/calendars/hebrew/hebrew.lark index b55ec3f..7e7b107 100644 --- a/src/undate/converters/calendars/hebrew/hebrew.lark +++ b/src/undate/converters/calendars/hebrew/hebrew.lark @@ -11,23 +11,23 @@ hebrew_date: day month year | month year | year // PGP dates use qualifiers like "first decade of" (for beginning of month) // "first third of", seasons (can look for more examples) -// Hebrew calendar starts with year 1 in 3761 BCE +// Hebrew calendar starts with year 1 in 3761 BCE year: /\d+/ // months month: month_1 | month_2 - | month_3 - | month_4 - | month_5 - | month_6 - | month_7 - | month_8 - | month_9 - | month_10 - | month_11 - | month_12 - | month_13 + | month_3 + | month_4 + | month_5 + | month_6 + | month_7 + | month_8 + | month_9 + | month_10 + | month_11 + | month_12 + | month_13 // months have 29 or 30 days; we do not expect leading zeroes day: /[1-9]/ | /[12][0-9]/ | /30/ diff --git a/src/undate/converters/calendars/hebrew/transformer.py b/src/undate/converters/calendars/hebrew/transformer.py index a6d2888..8e39aeb 100644 --- a/src/undate/converters/calendars/hebrew/transformer.py +++ b/src/undate/converters/calendars/hebrew/transformer.py @@ -22,14 +22,14 @@ def hebrew_date(self, items): value = int(child.children[0]) parts[str(child.data)] = value - # initialize and return an undate with islamic year, month, day and - # islamic calendar + # initialize and return an undate with year, month, day in + # hebrew calendar return HebrewUndate(**parts) - # year translation is not needed since we want a tree with name year - # this is equivalent to a no-op - # def year(self, items): - # return Tree(data="year", children=[items[0]]) + def year(self, items): + # combine multiple parts into a single string + value = "".join([str(i) for i in items]) + return Tree(data="year", children=[value]) def month(self, items): # month has a nested tree for the rule and the value diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/hijri/transformer.py index b575df9..45b4558 100644 --- a/src/undate/converters/calendars/hijri/transformer.py +++ b/src/undate/converters/calendars/hijri/transformer.py @@ -28,8 +28,17 @@ def hijri_date(self, items): # year translation is not needed since we want a tree with name year # this is equivalent to a no-op - # def year(self, items): - # return Tree(data="year", children=[items[0]]) + def year(self, items): + # combine multiple parts into a single string + # (for some reason we're getting an anonymous token in combined parser) + value = "".join([str(i) for i in items]) + return Tree(data="year", children=[value]) + + def day(self, items): + # combine multiple parts into a single string + # (for some reason we're getting an anonymous token in combined parser) + value = "".join([str(i) for i in items]) + return Tree(data="day", children=[value]) def month(self, items): # month has a nested tree for the rule and the value diff --git a/src/undate/converters/combined.lark b/src/undate/converters/combined.lark new file mode 100644 index 0000000..eb559d4 --- /dev/null +++ b/src/undate/converters/combined.lark @@ -0,0 +1,32 @@ +%import common.WS +%ignore WS + +start: (edtf__start | hebrew__hebrew_date | hijri__hijri_date ) + +// Renaming of the import variables is required, as they receive the namespace of this file. +// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 + +// relative import from edtf/edtf.lark +// NOTE: this results in a prefix of edtf__edtf__ +%import .edtf.edtf.edtf -> edtf__start + +// relative import from calendars/hebrew/hebrew.lark +%import .calendars.hebrew.hebrew.hebrew_date -> hebrew__hebrew_date +%import .calendars.hebrew.hebrew.day -> hebrew__day +%import .calendars.hebrew.hebrew.month -> hebrew__month +%import .calendars.hebrew.hebrew.year -> hebrew__year + +// relative import from calendars/hijri/hijri.lark +%import .calendars.hijri.hijri.hijri_date -> hijri__hijri_date +%import .calendars.hijri.hijri.day -> hijri__day +%import .calendars.hijri.hijri.month -> hijri__month +%import .calendars.hijri.hijri.year -> hijri__year + + + +// override hebrew date to omit year-only, since year without calendar is ambiguous +// NOTE: potentially support year with calendar label +%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year + +// same for hijri date, year alone is ambiguous +%override hijri__hijri_date: hijri__day hijri__month hijri__year | hijri__month hijri__year diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py new file mode 100644 index 0000000..7f41afc --- /dev/null +++ b/src/undate/converters/combined.py @@ -0,0 +1,25 @@ +from lark import Lark +from lark.visitors import Transformer, merge_transformers + +from undate.converters.edtf.transformer import EDTFTransformer +from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer +from undate.converters.calendars.hijri.transformer import HijriDateTransformer + + +class CombinedDateTransformer(Transformer): + def start(self, children): + return children + + +# NOTE: we can't support year-only dates in combined parser because calendar +# is ambiguous, unless we want to add a calendar indicator + +combined_transformer = merge_transformers( + CombinedDateTransformer(), + edtf__edtf=EDTFTransformer(), # nested prefix due to nested import path + hebrew=HebrewDateTransformer(), + hijri=HijriDateTransformer(), +) + + +parser = Lark.open("combined.lark", rel_to=__file__, strict=True) diff --git a/src/undate/converters/edtf/transformer.py b/src/undate/converters/edtf/transformer.py index d5bcfcb..5268700 100644 --- a/src/undate/converters/edtf/transformer.py +++ b/src/undate/converters/edtf/transformer.py @@ -66,7 +66,10 @@ def day_unspecified(self, items): def date_level1(self, items): return self.date(items) - # year (including negative years) use default transformation + def year(self, items): + # combine parts (numeric & unknown) into a single string + value = "".join(self.get_values(items)) + return Tree(data="year", children=[value]) def year_fivedigitsplus(self, items): # strip off the leading Y and convert to integer diff --git a/tests/test_converters/test_combined_parser.py b/tests/test_converters/test_combined_parser.py new file mode 100644 index 0000000..b90ca73 --- /dev/null +++ b/tests/test_converters/test_combined_parser.py @@ -0,0 +1,35 @@ +import pytest + +from undate.converters.combined import parser, combined_transformer + +from undate.undate import Undate, UndateInterval + +# for now, just test that valid dates can be parsed + +testcases = [ + # EDTF + ("1984", Undate(1984)), + ("201X", Undate("201X")), + ("20XX", Undate("20XX")), + ("2004-XX", Undate(2004, "XX")), + ("1000/2000", UndateInterval(Undate(1000), Undate(2000))), + # Hebrew / Anno Mundi calendar + ("Tammuz 4816", Undate(4816, 4, calendar="Hebrew")), + # Islamic / Hijri calendar + ("Jumādā I 1243", Undate(1243, 5, calendar="Hijri")), + ("7 Jumādā I 1243", Undate(1243, 5, 7, calendar="Hijri")), + ("14 Rabīʿ I 901", Undate(901, 3, 14, calendar="Hijri")), +] + + +@pytest.mark.parametrize("date_string,expected", testcases) +def test_transform(date_string, expected): + transformer = combined_transformer + # parse the input string, then transform to undate object + parsetree = parser.parse(date_string) + print(parsetree) + # since the same unknown date is not considered strictly equal, + # compare object representations + transformed_date = transformer.transform(parsetree) + print(transformed_date) + assert repr(transformed_date[0]) == repr(expected) From 2fc55f7d7cc0acbb185fd5e85a172b387ec516ed Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 18 Dec 2025 16:03:04 -0500 Subject: [PATCH 2/6] Move parser grammar files to common location; simplify combined parser --- src/undate/converters/__init__.py | 4 ++- src/undate/converters/base.py | 5 +++ .../converters/calendars/hebrew/parser.py | 6 ++-- .../converters/calendars/hijri/parser.py | 6 ++-- src/undate/converters/combined.lark | 32 ------------------- src/undate/converters/combined.py | 15 ++++++--- src/undate/converters/edtf/parser.py | 6 ++-- src/undate/converters/grammars/combined.lark | 32 +++++++++++++++++++ .../converters/{edtf => grammars}/edtf.lark | 0 .../hebrew => grammars}/hebrew.lark | 0 .../{calendars/hijri => grammars}/hijri.lark | 0 11 files changed, 60 insertions(+), 46 deletions(-) delete mode 100644 src/undate/converters/combined.lark create mode 100644 src/undate/converters/grammars/combined.lark rename src/undate/converters/{edtf => grammars}/edtf.lark (100%) rename src/undate/converters/{calendars/hebrew => grammars}/hebrew.lark (100%) rename src/undate/converters/{calendars/hijri => grammars}/hijri.lark (100%) diff --git a/src/undate/converters/__init__.py b/src/undate/converters/__init__.py index e13532d..cfbe6fa 100644 --- a/src/undate/converters/__init__.py +++ b/src/undate/converters/__init__.py @@ -1 +1,3 @@ -from undate.converters.base import BaseDateConverter as BaseDateConverter +from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH + +__all__ = ["BaseDateConverter", "GRAMMAR_FILE_PATH"] diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 5fefe49..9082a5b 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -44,6 +44,7 @@ import importlib import logging +import pathlib import pkgutil from functools import cache from typing import Dict, Type @@ -51,6 +52,10 @@ logger = logging.getLogger(__name__) +#: path to parser grammar files +GRAMMAR_FILE_PATH = pathlib.Path(__file__).parent / "grammars" + + class BaseDateConverter: """Base class for parsing, formatting, and converting dates to handle specific formats and different calendars.""" diff --git a/src/undate/converters/calendars/hebrew/parser.py b/src/undate/converters/calendars/hebrew/parser.py index 5654f60..3056f85 100644 --- a/src/undate/converters/calendars/hebrew/parser.py +++ b/src/undate/converters/calendars/hebrew/parser.py @@ -1,8 +1,8 @@ -import pathlib - from lark import Lark -grammar_path = pathlib.Path(__file__).parent / "hebrew.lark" +from undate.converters import GRAMMAR_FILE_PATH + +grammar_path = GRAMMAR_FILE_PATH / "hebrew.lark" with open(grammar_path) as grammar: # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates diff --git a/src/undate/converters/calendars/hijri/parser.py b/src/undate/converters/calendars/hijri/parser.py index 273cdf9..a119905 100644 --- a/src/undate/converters/calendars/hijri/parser.py +++ b/src/undate/converters/calendars/hijri/parser.py @@ -1,8 +1,8 @@ -import pathlib - from lark import Lark -grammar_path = pathlib.Path(__file__).parent / "hijri.lark" +from undate.converters import GRAMMAR_FILE_PATH + +grammar_path = GRAMMAR_FILE_PATH / "hijri.lark" with open(grammar_path) as grammar: # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates diff --git a/src/undate/converters/combined.lark b/src/undate/converters/combined.lark deleted file mode 100644 index eb559d4..0000000 --- a/src/undate/converters/combined.lark +++ /dev/null @@ -1,32 +0,0 @@ -%import common.WS -%ignore WS - -start: (edtf__start | hebrew__hebrew_date | hijri__hijri_date ) - -// Renaming of the import variables is required, as they receive the namespace of this file. -// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 - -// relative import from edtf/edtf.lark -// NOTE: this results in a prefix of edtf__edtf__ -%import .edtf.edtf.edtf -> edtf__start - -// relative import from calendars/hebrew/hebrew.lark -%import .calendars.hebrew.hebrew.hebrew_date -> hebrew__hebrew_date -%import .calendars.hebrew.hebrew.day -> hebrew__day -%import .calendars.hebrew.hebrew.month -> hebrew__month -%import .calendars.hebrew.hebrew.year -> hebrew__year - -// relative import from calendars/hijri/hijri.lark -%import .calendars.hijri.hijri.hijri_date -> hijri__hijri_date -%import .calendars.hijri.hijri.day -> hijri__day -%import .calendars.hijri.hijri.month -> hijri__month -%import .calendars.hijri.hijri.year -> hijri__year - - - -// override hebrew date to omit year-only, since year without calendar is ambiguous -// NOTE: potentially support year with calendar label -%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year - -// same for hijri date, year alone is ambiguous -%override hijri__hijri_date: hijri__day hijri__month hijri__year | hijri__month hijri__year diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py index 7f41afc..6c681f2 100644 --- a/src/undate/converters/combined.py +++ b/src/undate/converters/combined.py @@ -1,6 +1,7 @@ from lark import Lark from lark.visitors import Transformer, merge_transformers +from undate.converters import GRAMMAR_FILE_PATH from undate.converters.edtf.transformer import EDTFTransformer from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer from undate.converters.calendars.hijri.transformer import HijriDateTransformer @@ -8,18 +9,24 @@ class CombinedDateTransformer(Transformer): def start(self, children): + # trigger the transformer for the appropriate part of the grammar return children -# NOTE: we can't support year-only dates in combined parser because calendar -# is ambiguous, unless we want to add a calendar indicator +# NOTE: currently year-only dates in combined parser are interpreted as +# EDTF and use Gregorian calendar. +# In future, we could refine by adding calendar names & abbreviations +# to the parser in order to recognize years from other calendars. combined_transformer = merge_transformers( CombinedDateTransformer(), - edtf__edtf=EDTFTransformer(), # nested prefix due to nested import path + edtf=EDTFTransformer(), hebrew=HebrewDateTransformer(), hijri=HijriDateTransformer(), ) -parser = Lark.open("combined.lark", rel_to=__file__, strict=True) +# open based on filename so we can specify relative import path based on grammar file +parser = Lark.open( + str(GRAMMAR_FILE_PATH / "combined.lark"), rel_to=__file__, strict=True +) diff --git a/src/undate/converters/edtf/parser.py b/src/undate/converters/edtf/parser.py index 27c2bd6..bc8f0ef 100644 --- a/src/undate/converters/edtf/parser.py +++ b/src/undate/converters/edtf/parser.py @@ -1,8 +1,8 @@ -import pathlib - from lark import Lark -grammar_path = pathlib.Path(__file__).parent / "edtf.lark" +from undate.converters import GRAMMAR_FILE_PATH + +grammar_path = GRAMMAR_FILE_PATH / "edtf.lark" with open(grammar_path) as grammar: edtf_parser = Lark(grammar.read(), start="edtf") diff --git a/src/undate/converters/grammars/combined.lark b/src/undate/converters/grammars/combined.lark new file mode 100644 index 0000000..7a6142a --- /dev/null +++ b/src/undate/converters/grammars/combined.lark @@ -0,0 +1,32 @@ +%import common.WS +%ignore WS + +start: (edtf__start | hebrew__hebrew_date | hijri__hijri_date ) + +// Renaming of the import variables is required, as they receive the namespace of this file. +// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 + +// All grammars are in the same file, so we can use relative imports + +// relative import from edtf.lark +%import .edtf.edtf -> edtf__start + +// relative import from hebrew.lark +%import .hebrew.hebrew_date -> hebrew__hebrew_date +%import .hebrew.day -> hebrew__day +%import .hebrew.month -> hebrew__month +%import .hebrew.year -> hebrew__year + +// relative import from hijri.lark +%import .hijri.hijri_date -> hijri__hijri_date +%import .hijri.day -> hijri__day +%import .hijri.month -> hijri__month +%import .hijri.year -> hijri__year + + +// override hebrew date to omit year-only, since year without calendar is ambiguous +// NOTE: potentially support year with calendar label +%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year + +// same for hijri date, year alone is ambiguous +%override hijri__hijri_date: hijri__day hijri__month hijri__year | hijri__month hijri__year diff --git a/src/undate/converters/edtf/edtf.lark b/src/undate/converters/grammars/edtf.lark similarity index 100% rename from src/undate/converters/edtf/edtf.lark rename to src/undate/converters/grammars/edtf.lark diff --git a/src/undate/converters/calendars/hebrew/hebrew.lark b/src/undate/converters/grammars/hebrew.lark similarity index 100% rename from src/undate/converters/calendars/hebrew/hebrew.lark rename to src/undate/converters/grammars/hebrew.lark diff --git a/src/undate/converters/calendars/hijri/hijri.lark b/src/undate/converters/grammars/hijri.lark similarity index 100% rename from src/undate/converters/calendars/hijri/hijri.lark rename to src/undate/converters/grammars/hijri.lark From 25137cba85b28682c1ff5430578567234aa56c63 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 18 Dec 2025 16:27:10 -0500 Subject: [PATCH 3/6] Add, document, & test omnibus converter --- docs/undate/converters.rst | 9 +++ src/undate/converters/base.py | 2 +- src/undate/converters/combined.py | 55 ++++++++++++++++++- tests/test_converters/test_combined_parser.py | 11 +++- 4 files changed, 72 insertions(+), 5 deletions(-) diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst index 57e90a1..7ff24bf 100644 --- a/docs/undate/converters.rst +++ b/docs/undate/converters.rst @@ -4,10 +4,19 @@ Converters Overview -------- + +..note: base converter is pretty developer-facing documentation; +.. calendar/converter usage would be helpful to add + .. automodule:: undate.converters.base :members: :undoc-members: + +.. autoclass:: undate.converters.combined.OmnibusDateConverter + :members: + + Formats -------- diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 9082a5b..92c0be5 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -52,7 +52,7 @@ logger = logging.getLogger(__name__) -#: path to parser grammar files +#: Path to parser grammar files GRAMMAR_FILE_PATH = pathlib.Path(__file__).parent / "grammars" diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py index 6c681f2..f674595 100644 --- a/src/undate/converters/combined.py +++ b/src/undate/converters/combined.py @@ -1,7 +1,17 @@ +""" +**Experimental** combined parser. Supports EDTF, Hebrew, and Hijri +where dates are unambiguous. (Year-only dates are parsed as EDTF in +Gregorian calendar.) +""" + +from typing import Union + from lark import Lark +from lark.exceptions import UnexpectedCharacters from lark.visitors import Transformer, merge_transformers -from undate.converters import GRAMMAR_FILE_PATH +from undate.undate import Undate, UndateInterval +from undate.converters import BaseDateConverter, GRAMMAR_FILE_PATH from undate.converters.edtf.transformer import EDTFTransformer from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer from undate.converters.calendars.hijri.transformer import HijriDateTransformer @@ -30,3 +40,46 @@ def start(self, children): parser = Lark.open( str(GRAMMAR_FILE_PATH / "combined.lark"), rel_to=__file__, strict=True ) + + +class OmnibusDateConverter(BaseDateConverter): + """ + Combination parser that aggregates existing parser grammars. + Currently supports EDTF, Hebrew, and Hijri where dates are unambiguous. + (Year-only dates are parsed as EDTF in Gregorian calendar.) + + Does not support serialization. + + Example usage:: + + Undate.parse("Tammuz 4816", "omnibus") + + """ + + #: converter name: omnibus + name: str = "omnibus" + + def __init__(self): + self.transformer = combined_transformer + + def parse(self, value: str) -> Union[Undate, UndateInterval]: + """ + Parse a string in a supported format and return an :class:`~undate.undate.Undate` + or :class:`~undate.undate.UndateInterval`. + """ + if not value: + raise ValueError("Parsing empty/unset string is not supported") + + # parse the input string, then transform to undate object + try: + parsetree = parser.parse(value) + # transform returns a list; we want the first item in the list + return self.transformer.transform(parsetree)[0] + except UnexpectedCharacters: + raise ValueError( + "Parsing failed: '%s' is not in a recognized date format" % value + ) + + def to_string(self, undate: Union[Undate, UndateInterval]) -> str: + "Not supported by this converter. Will raise :class:`ValueError`" + raise ValueError("Omnibus supporter does not support serialization") diff --git a/tests/test_converters/test_combined_parser.py b/tests/test_converters/test_combined_parser.py index b90ca73..cb65a0c 100644 --- a/tests/test_converters/test_combined_parser.py +++ b/tests/test_converters/test_combined_parser.py @@ -4,7 +4,7 @@ from undate.undate import Undate, UndateInterval -# for now, just test that valid dates can be parsed +# test that valid dates can be parsed testcases = [ # EDTF @@ -24,12 +24,17 @@ @pytest.mark.parametrize("date_string,expected", testcases) def test_transform(date_string, expected): + # test the transformer directly transformer = combined_transformer # parse the input string, then transform to undate object parsetree = parser.parse(date_string) - print(parsetree) # since the same unknown date is not considered strictly equal, # compare object representations transformed_date = transformer.transform(parsetree) - print(transformed_date) assert repr(transformed_date[0]) == repr(expected) + + +@pytest.mark.parametrize("date_string,expected", testcases) +def test_converter(date_string, expected): + # should work the same way when called through the converter class + assert repr(Undate.parse(date_string, "omnibus")) == repr(expected) From 7a99c5cff55ea1f0eecac5e4b9bf6d350a9463c4 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 18 Dec 2025 16:30:53 -0500 Subject: [PATCH 4/6] Add test case for unsupported serialization --- src/undate/converters/combined.py | 2 +- tests/test_converters/test_combined_parser.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py index f674595..57a0450 100644 --- a/src/undate/converters/combined.py +++ b/src/undate/converters/combined.py @@ -82,4 +82,4 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]: def to_string(self, undate: Union[Undate, UndateInterval]) -> str: "Not supported by this converter. Will raise :class:`ValueError`" - raise ValueError("Omnibus supporter does not support serialization") + raise ValueError("Omnibus converter does not support serialization") diff --git a/tests/test_converters/test_combined_parser.py b/tests/test_converters/test_combined_parser.py index cb65a0c..1ff2322 100644 --- a/tests/test_converters/test_combined_parser.py +++ b/tests/test_converters/test_combined_parser.py @@ -38,3 +38,8 @@ def test_transform(date_string, expected): def test_converter(date_string, expected): # should work the same way when called through the converter class assert repr(Undate.parse(date_string, "omnibus")) == repr(expected) + + +def test_no_serialize(): + with pytest.raises(ValueError, match="does not support"): + Undate("2022").format("omnibus") From 0691b12b6d7e81d5225dcbc8ef332de98f79c516 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 18 Dec 2025 17:10:41 -0500 Subject: [PATCH 5/6] Add tests for error cases --- tests/test_converters/test_combined_parser.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_converters/test_combined_parser.py b/tests/test_converters/test_combined_parser.py index 6dfe632..717a16e 100644 --- a/tests/test_converters/test_combined_parser.py +++ b/tests/test_converters/test_combined_parser.py @@ -40,6 +40,15 @@ def test_converter(date_string, expected): assert repr(Undate.parse(date_string, "omnibus")) == repr(expected) +def test_parse_errors(): + # empty string not supported + with pytest.raises(ValueError, match="not supported"): + Undate.parse("", "omnibus") + + with pytest.raises(ValueError, match="not in a recognized date format"): + Undate.parse("Monday 2023", "omnibus") + + def test_no_serialize(): with pytest.raises(ValueError, match="does not support"): Undate("2022").format("omnibus") From ce5baaaca907750302fc2bacd138bebd63fd9913 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 19 Dec 2025 08:56:41 -0500 Subject: [PATCH 6/6] Add brief overview docstring for converter module [skip ci] --- docs/undate/converters.rst | 7 +++++-- src/undate/converters/__init__.py | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst index b9debe2..719d065 100644 --- a/docs/undate/converters.rst +++ b/docs/undate/converters.rst @@ -5,8 +5,9 @@ Overview -------- -..note: base converter is pretty developer-facing documentation; -.. calendar/converter usage would be helpful to add +.. automodule:: undate.converters + +----- .. automodule:: undate.converters.base :members: @@ -42,6 +43,8 @@ Extended Date-Time Format (EDTF) Calendars --------- +.. automodule:: undate.converters.calendars + Gregorian ^^^^^^^^^ diff --git a/src/undate/converters/__init__.py b/src/undate/converters/__init__.py index cfbe6fa..c13f2f1 100644 --- a/src/undate/converters/__init__.py +++ b/src/undate/converters/__init__.py @@ -1,3 +1,29 @@ +""" +Converter classes add support for parsing and serializing dates +in a variety of formats. A subset of these are calendar converters +(:mod:`undate.converters.calendar`), which means they support both parsing +and conversion from an alternate calendar to a common Gregorian +for comparison across dates. + +To parse a date with a supported converter, use the ``Undate`` class method +:meth:`~undate.undate.Undate.parse` and specify the date as a string +with the desired format or calendar, e.g. + +.. code-block:: + + Undate.parse("2001-05", "EDTF") + Undate.parse("7 Heshvan 5425", "Hebrew") + +For converters that support it, you can also serialize a date in a specified +format with ``Undate`` class method :meth:`~undate.undate.Undate.format`: + +.. code-block:: + + Undate.parse("Rabīʿ ath-Thānī 343", "Islamic").format("EDTF") + + +""" + from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH __all__ = ["BaseDateConverter", "GRAMMAR_FILE_PATH"]