diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 84206fa2c..272e328c0 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -191,6 +191,7 @@ For larger initiatives, use PRDs stored in `PRDs/-/`: | PRD | Description | Status | |-----|-------------|--------| | `PRDs/20251206-build-cleanup/` | Build warnings and deprecation removal | In Progress | +| `PRDs/20260207-format-datetime/` | Date/time formatting functions (issue #283) | In Progress | ### Completed PRDs diff --git a/PRDs/20260207-format-datetime/PRD.md b/PRDs/20260207-format-datetime/PRD.md new file mode 100644 index 000000000..3cd497d85 --- /dev/null +++ b/PRDs/20260207-format-datetime/PRD.md @@ -0,0 +1,527 @@ +# PRD: Date/Time Formatting Functions + +## Document Information + +| Field | Value | +|-------|-------| +| **PRD ID** | FORMAT-DT-001 | +| **Issue** | [#283](https://github.com/metaschema-framework/metaschema-java/issues/283) | +| **Status** | In Progress | +| **Author** | David Waltermire | +| **Created** | 2026-02-07 | +| **Last Updated** | 2026-02-07 | + +--- + +## 1. Overview + +### 1.1 Problem Statement + +Metapath authors currently have no way to format date, dateTime, or time values for display. While the Metapath function library provides functions to extract individual components (year, month, day, etc.) and adjust timezones, there is no mechanism to produce human-readable formatted strings from temporal values. This limits the utility of Metapath in report generation and user-facing output scenarios. + +### 1.2 Goals + +1. Implement `fn:format-integer` per [XPath 3.1 section 4.6.1](https://www.w3.org/TR/xpath-functions-31/#func-format-integer) (prerequisite for date/time formatting) +2. Implement `fn:format-dateTime` per [XPath 3.1 section 9.8.1](https://www.w3.org/TR/xpath-functions-31/#func-format-dateTime) +3. Implement `fn:format-date` per [XPath 3.1 section 9.8.2](https://www.w3.org/TR/xpath-functions-31/#func-format-date) +4. Implement `fn:format-time` per [XPath 3.1 section 9.8.3](https://www.w3.org/TR/xpath-functions-31/#func-format-time) +5. Support both 2-argument and 5-argument signatures for all date/time formatting functions +6. Implement all 16 picture string variable markers defined in the spec + +### 1.3 Non-Goals + +- `fn:format-number` (separate issue scope) +- Non-Gregorian calendar systems beyond basic infrastructure (implementation-defined fallback to Gregorian) +- Locale-aware month/day names beyond English (implementation-defined fallback to English) + +### 1.4 Success Metrics + +| Metric | Current | Target | +|--------|---------|--------| +| Formatting functions available | 0 of 4 | 4 of 4 (format-integer + 3 date/time) | +| Picture string variable markers supported | 0 of 16 | 16 of 16 | +| Function signatures registered | 0 of 8 | 8 of 8 (2 for format-integer + 6 for date/time) | +| Unit test coverage for new code | N/A | 80%+ | +| CI build passes | N/A | All checks green | + +--- + +## 2. Background + +### 2.1 Current State + +The Metapath function library currently supports: + +- **Component extraction**: `fn:year-from-dateTime`, `fn:month-from-date`, `fn:day-from-date`, `fn:hours-from-time`, etc. +- **Timezone adjustment**: `fn:adjust-dateTime-to-timezone`, `fn:adjust-date-to-timezone`, `fn:adjust-time-to-timezone` +- **Current values**: `fn:current-dateTime`, `fn:current-date`, `fn:current-time` +- **Construction**: `fn:dateTime` (combines date and time) + +The formatting functions are marked as P2 (priority 2) in `DefaultFunctionLibrary.java` at lines 100-104, with placeholder comments referencing the XPath spec URLs. + +### 2.2 Technical Context + +**Item type hierarchy**: All temporal items implement `ITemporalItem`, which provides `getYear()`, `getMonth()`, `getDay()`, `getHour()`, `getMinute()`, `getSecond()`, `getNano()`, `getZoneOffset()`, and `hasTimezone()`. Calendar-based items (`IDateItem`, `IDateTimeItem`) extend `ICalendarTemporalItem` which adds `asZonedDateTime()`. + +**Function registration**: Functions are final utility classes with static `SIGNATURE` fields built via `IFunction.builder()`. They are registered in `DefaultFunctionLibrary` via `registerFunction()`. + +**Namespace**: All standard functions use `MetapathConstants.NS_METAPATH_FUNCTIONS` (`http://csrc.nist.gov/ns/metaschema/metapath-functions`). + +**Existing patterns for multi-arity functions**: `FnSubstring` (2-arg and 3-arg), `FnAdjustDateTimeToTimezone` (1-arg and 2-arg) demonstrate separate `SIGNATURE_*` fields and `execute*` handler methods per arity. + +--- + +## 3. Requirements + +### 3.1 Functional Requirements + +#### FR-0: fn:format-integer (Prerequisite) + +Implement `fn:format-integer` per [XPath 3.1 section 4.6.1](https://www.w3.org/TR/xpath-functions-31/#func-format-integer). This function is used by the date/time formatting functions for rendering integer-valued components. + +Two signatures: + +```text +fn:format-integer($value as xs:integer?, $picture as xs:string) as xs:string +fn:format-integer($value as xs:integer?, $picture as xs:string, + $lang as xs:string?) as xs:string +``` + +Key behaviors: + +- If `$value` is empty sequence, returns zero-length string +- Negative values: format absolute value and prepend minus sign +- `$picture` consists of a primary format token, optionally followed by `;` and a format modifier + +Primary format tokens: + +| Token | Description | +|-------|-------------| +| Decimal digit pattern (`1`, `01`, `001`, `#,##0`, etc.) | Decimal number with optional zero-padding and grouping separators | +| `a` | Lowercase alphabetic: a, b, c, ..., z, aa, ab, ... | +| `A` | Uppercase alphabetic: A, B, C, ..., Z, AA, AB, ... | +| `i` | Lowercase roman: i, ii, iii, iv, v, ... | +| `I` | Uppercase roman: I, II, III, IV, V, ... | +| `w` | Words, lowercase: one, two, three, ... | +| `W` | Words, uppercase: ONE, TWO, THREE, ... | +| `Ww` | Words, title case: One, Two, Three, ... | + +Format modifier (after `;`): + +- `c` or `o`: Cardinal or ordinal (e.g., `1;o` → `1st`) +- `a` or `t`: Alphabetic or traditional numbering +- Ordinal parenthesized suffix: `o(-th)` for language-specific endings + +Decimal digit pattern details: + +- `#` = optional-digit-sign, Unicode Nd = mandatory-digit-sign +- All mandatory digits must be from the same digit family +- Grouping separators: non-alphanumeric characters (e.g., `,` in `#,##0`) +- Regular grouping: extrapolated to the left (e.g., `0'000` → `1'000'000`) + +Properties: + +- 2-arg: deterministic, context-dependent (default language), focus-independent +- 3-arg: deterministic, context-independent, focus-independent + +Error: `FODF1310` for invalid format token syntax. + +#### FR-1: fn:format-dateTime + +Implement the function with two signatures: + +```text +fn:format-dateTime($value as xs:dateTime?, $picture as xs:string) as xs:string? +fn:format-dateTime($value as xs:dateTime?, $picture as xs:string, + $language as xs:string?, $calendar as xs:string?, + $place as xs:string?) as xs:string? +``` + +Formats an `IDateTimeItem` using the picture string. Returns empty sequence if `$value` is empty. All 16 variable markers are valid for dateTime values. + +#### FR-2: fn:format-date + +Implement the function with two signatures: + +```text +fn:format-date($value as xs:date?, $picture as xs:string) as xs:string? +fn:format-date($value as xs:date?, $picture as xs:string, + $language as xs:string?, $calendar as xs:string?, + $place as xs:string?) as xs:string? +``` + +Formats an `IDateItem` using the picture string. Time-related markers (`H`, `h`, `P`, `m`, `s`, `f`) raise `FOFD1350` if present in the picture string. + +#### FR-3: fn:format-time + +Implement the function with two signatures: + +```text +fn:format-time($value as xs:time?, $picture as xs:string) as xs:string? +fn:format-time($value as xs:time?, $picture as xs:string, + $language as xs:string?, $calendar as xs:string?, + $place as xs:string?) as xs:string? +``` + +Formats an `ITimeItem` using the picture string. Date-related markers (`Y`, `M`, `D`, `d`, `F`, `W`, `w`, `E`) raise `FOFD1350` if present in the picture string. + +#### FR-4: Picture String Parsing + +Parse XPath 3.1 picture strings containing: + +- **Literal text**: Characters outside `[...]` brackets +- **Escaped brackets**: `[[` → `[`, `]]` → `]` +- **Variable markers**: `[component presentation? width?]` + +All 16 variable markers: + +| Marker | Description | Valid in dateTime | Valid in date | Valid in time | +|--------|-------------|:-:|:-:|:-:| +| `Y` | Year | yes | yes | no | +| `M` | Month in year | yes | yes | no | +| `D` | Day in month | yes | yes | no | +| `d` | Day in year | yes | yes | no | +| `F` | Day of week | yes | yes | no | +| `W` | Week of year | yes | yes | no | +| `w` | Week of month | yes | yes | no | +| `H` | Hour (24-hour, 0-23) | yes | no | yes | +| `h` | Hour (12-hour, 1-12) | yes | no | yes | +| `P` | AM/PM marker | yes | no | yes | +| `m` | Minute | yes | no | yes | +| `s` | Second | yes | no | yes | +| `f` | Fractional seconds | yes | no | yes | +| `Z` | Timezone (offset format, see FR-10) | yes | yes | yes | +| `z` | Timezone (same as Z with `GMT` prefix, see FR-10) | yes | yes | yes | +| `C` | Calendar name | yes | yes | yes | +| `E` | Era name | yes | yes | no | + +#### FR-5: Presentation Modifiers + +A variable marker consists of a component specifier followed optionally by a first presentation modifier, an optional second presentation modifier, and an optional width modifier. Whitespace within a variable marker is ignored. + +The **first presentation modifier** controls how the value is rendered: + +| Format | Description | Example | +|--------|-------------|---------| +| `1` | Decimal number (default for most numeric components) | `[M]` or `[M1]` → `3` | +| `01` | Zero-padded decimal (default for `m`, `s`) | `[m01]` → `05` | +| `001`, `0001`, etc. | Zero-padded to digit count | `[Y0001]` → `2026` | +| `N` | Name, uppercase | `[MN]` → `MARCH` | +| `n` | Name, lowercase (default for `F`, `P`, `C`, `E`) | `[Fn]` → `monday` | +| `Nn` | Name, title case | `[MNn]` → `March` | +| `i` | Roman numeral, lowercase | `[Mi]` → `iii` | +| `I` | Roman numeral, uppercase | `[MI]` → `III` | +| `w` | Words, lowercase | `[Yw]` → `two thousand twenty-six` | +| `W` | Words, uppercase | `[YW]` → `TWO THOUSAND TWENTY-SIX` | +| `Ww` | Words, title case | `[YWw]` → `Two Thousand Twenty-Six` | + +Default presentation modifiers per spec: + +| Component | Default | +|-----------|---------| +| Y, M, D, d, W, w, H, h, f | `1` (decimal) | +| F, P, C, E | `n` (name, lowercase) | +| m, s | `01` (zero-padded two digits) | +| Z, z | `01:01` (zero-padded hours and minutes with separator) | + +The **second presentation modifier** (optional, single character after first modifier) controls numbering style: + +| Modifier | Meaning | +|----------|---------| +| `a` or `t` | Alphabetic or traditional numbering (implementation-defined default) | +| `c` or `o` | Cardinal or ordinal numbering: `[D1o]` → `1st`, `[D1c]` → `1` | + +If a comma appears in the format token, the last comma introduces the width modifier; all other commas are grouping separators (e.g., `[Y9,999,*]` → `2,026`). + +#### FR-6: Width Modifiers + +Width modifier syntax: `,min-width` or `,min-width-max-width` + +- `min-width`: unsigned integer or `*` (no minimum) +- `max-width`: unsigned integer or `*` (no maximum, the default if omitted) +- Error `FOFD1340` if min < 1, max < 1, or max < min + +Examples: + +- `[M,2]` — minimum width 2, no maximum +- `[M,2-2]` — exactly 2 characters +- `[MNn,*-3]` — name, maximum 3 characters (abbreviation) +- `[MNn,3-3]` — name, exactly 3 characters + +A format token with multiple digits (e.g., `001`, `9999`) implicitly sets min and max width to the digit count; an explicit width modifier overrides this. + +For name-based modifiers (`N`, `n`, `Nn`): if shorter than min, pad with spaces; if longer than max, abbreviate using conventional abbreviation or truncation. + +#### FR-7: Language, Calendar, and Place Parameters + +- `$language`: Language tag (e.g., `"en"`) per `xml:lang`. Default to English. Unsupported languages fall back to English with no error (implementation-defined). +- `$calendar`: Calendar designator as an `EQName`. Default to `"AD"` (Gregorian). Must be a valid EQName or raises `FOFD1340`. Unsupported calendars fall back to Gregorian with a `[Calendar: AD]` prefix in output. +- `$place`: Country code ([ISO 3166-1]) or IANA timezone name. Implementation-defined; in this implementation, IANA timezone names are used to adjust the value's timezone offset before formatting. Unrecognized values are ignored. + +The 2-arg form is equivalent to calling the 5-arg form with `$language`, `$calendar`, and `$place` all set to empty sequence. + +#### FR-8: Function Properties + +Per the W3C spec, these functions are **context-dependent**: + +- **2-arg form**: deterministic, context-dependent, focus-independent. Depends on default calendar, default language, default place, and implicit timezone. +- **5-arg form**: deterministic, context-dependent, focus-independent. Depends on implicit timezone and namespaces. + +#### FR-9: Error Handling + +Per the W3C spec ([section 9.8.4](https://www.w3.org/TR/xpath-functions-31/#date-time-formatting)): + +- `FOFD1340`: Invalid picture string syntax (unmatched brackets, unknown marker, invalid width modifier), or invalid `$calendar` value (not a valid EQName or unrecognized no-namespace designator) +- `FOFD1350`: Component specifier refers to components not available in the given value type (e.g., `[H]` in `format-date`, `[Y]` in `format-time`) + +Use of valid but unsupported options in `$language`, `$calendar`, or `$place` is **not an error** — the implementation must output a fallback representation. + +#### FR-10: Timezone Formatting (Z and z) + +Per spec section 9.8.4.6, timezone formatting has special rules: + +- If the value has **no timezone**, timezone components produce empty output (except military format `ZZ` → `"J"`) +- `[Z]` default (`01:01`): signed offset with separator, e.g., `+05:00`, `-08:00` +- `[Z0]` or `[Z01]`: hours only, minutes appended with colon only if non-zero, e.g., `-5`, `+03`, `+5:30` +- `[Z0:00]` or `[Z01:01]`: hours and minutes with separator, always, e.g., `-5:00`, `+05:00` +- `[Z0000]` or `[Z0001]`: hours and minutes with no separator, e.g., `-0500`, `+0530` +- `[Z01:01t]`: second modifier `t` causes UTC to render as `Z` instead of `+00:00` +- `[ZZ]`: military timezone letter (A-M for +01 to +12, N-Y for -01 to -12, Z for UTC, J for local/no TZ) +- `[ZN]`: timezone name (e.g., `EST`, `CET`); falls back to `+01:01` format if name unavailable +- `[z...]`: same as `[Z...]` but prefixed with `GMT` (or localized equivalent); prefix omitted when timezone is identified by name + +| Variable marker | -05:00 | +00:00 | +05:30 | +|-----------------|--------|--------|--------| +| `[Z]` or `[Z01:01]` | `-05:00` | `+00:00` | `+05:30` | +| `[Z0]` | `-5` | `+0` | `+5:30` | +| `[Z0:00]` | `-5:00` | `+0:00` | `+5:30` | +| `[Z00:00]` | `-05:00` | `+00:00` | `+05:30` | +| `[Z0000]` | `-0500` | `+0000` | `+0530` | +| `[Z00:00t]` | `-05:00` | `Z` | `+05:30` | +| `[ZZ]` | `R` | `Z` | `+05:30` | +| `[z]` | `GMT-05:00` | `GMT+00:00` | `GMT+05:30` | + +#### FR-11: Year Formatting + +Per spec section 9.8.4.4, the year value output is the absolute value modulo 10^N, where N is: + +1. If width modifier specifies a finite max width → that max width +2. Else if first presentation modifier is a decimal-digit-pattern with W digits (W >= 2) → W +3. Otherwise → infinity (full year) + +Example: `[Y01]` outputs 2-digit year (`26` for year 2026), `[Y0001]` outputs 4-digit year. + +#### FR-12: Fractional Seconds Formatting + +Per spec section 9.8.4.5, fractional seconds use a reverse-digit algorithm: + +1. A single-digit format token (`[f1]`) retains all fractional digits +2. Multi-digit tokens set precision: `[f001]` = 3 decimal places, `[f01]` = 2 +3. Width modifier overrides: `[f1,1-1]` = exactly 1 digit +4. The algorithm reverses the decimal digit pattern, reverses the fractional value (removing trailing zeros), formats using `fn:format-integer` rules, then reverses the result +5. Excess digits are truncated (not rounded) + +### 3.2 Non-Functional Requirements + +#### NFR-1: Spec Conformance + +Implementation must conform to XPath 3.1 specification sections 9.8.1-9.8.5. The spec is the authoritative reference for any ambiguous behavior. + +#### NFR-2: Consistency with Existing Functions + +Follow the same patterns used by existing Metapath function implementations: + +- Final utility class with private constructor +- Static `SIGNATURE` fields using `IFunction.builder()` +- Private `execute*` handler methods with `@SuppressWarnings("unused")` +- Public static implementation methods for direct programmatic access +- SpotBugs annotations (`@NonNull`, `@Nullable`) + +#### NFR-3: Performance + +Picture string parsing should be efficient for repeated use. Consider caching parsed picture strings if profiling indicates a bottleneck (not required in initial implementation). + +#### NFR-4: Testability + +The picture string parser and formatting logic must be independently testable, separate from the Metapath function invocation machinery. + +--- + +## 4. Architecture + +### 4.1 Component Design + +```text +FnFormatDateTime.java ──┐ +FnFormatDate.java ──┼──▶ DateTimeFormatUtil.java ──▶ PictureStringParser +FnFormatTime.java ──┘ (formatting logic) (parses picture strings) + │ │ + ▼ ▼ + ITemporalItem methods FormatComponent[] + (extract values) (intermediate repr) +``` + +#### PictureStringParser + +Parses a picture string into a list of `FormatComponent` objects: + +- `LiteralComponent` — plain text to include verbatim +- `VariableMarkerComponent` — a marker letter with optional presentation modifier and width constraint + +#### DateTimeFormatUtil + +Contains the shared formatting algorithm: + +1. Parse the picture string via `PictureStringParser` +2. For each component: + - Literal: append text to output + - Variable marker: extract the value from `ITemporalItem`, apply presentation modifier and width, append to output +3. Return the assembled string + +Validates that markers are valid for the temporal type (date-only, time-only, or dateTime). + +#### Function Classes + +Each function class (`FnFormatDateTime`, `FnFormatDate`, `FnFormatTime`): + +- Defines 2-arg and 5-arg `SIGNATURE` constants +- Has `executeTwoArg` and `executeFiveArg` handler methods +- Delegates to `DateTimeFormatUtil` with the appropriate `ITemporalItem` and allowed marker set + +### 4.2 Package Location + +All new classes go in: + +```text +core/src/main/java/dev/metaschema/core/metapath/function/library/ +``` + +The parser and utility classes are package-private (used only by the function classes in the same package). The exception class goes in: + +```text +core/src/main/java/dev/metaschema/core/metapath/function/ +``` + +--- + +## 5. Testing Strategy + +### 5.1 Test Approach + +All development follows TDD. Tests are written first, verified to fail, then implementation makes them pass. + +### 5.2 Test Classes + +#### PictureStringParserTest + +Unit tests for parsing picture strings into component lists: + +- Simple literal text (`"hello"`) +- Single variable marker (`"[Y0001]"`) +- Mixed literal and markers (`"[Y0001]-[M01]-[D01]"`) +- Escaped brackets (`"[["` → `[`, `"]]"` → `]`) +- All 16 variable marker letters +- Presentation modifiers: numeric (`1`, `01`, `0001`), name (`N`, `n`, `Nn`), roman (`i`, `I`), word (`w`, `W`, `Ww`), ordinal (`o`) +- Width modifiers: min only, min-max, `*-max`, exact +- Nested/complex picture strings +- Error cases: unmatched `[`, unknown marker letter, malformed modifiers + +#### DateTimeFormatUtilTest + +Unit tests for the formatting algorithm: + +- **Year formatting**: 4-digit, 2-digit, zero-padded, word form +- **Month formatting**: numeric, zero-padded, full name, abbreviated name (3-char), title case, uppercase, lowercase +- **Day formatting**: numeric, zero-padded, ordinal (`1st`, `2nd`, `3rd`, `11th`, `21st`) +- **Day of year**: 1-366 range, zero-padded +- **Day of week**: numeric, full name (`Monday`), abbreviated +- **Week of year**: ISO week number +- **Week of month**: 1-5 +- **Hour formatting**: 24-hour, 12-hour, zero-padded, midnight/noon edge cases +- **AM/PM**: uppercase, lowercase, title case +- **Minute/Second**: zero-padded +- **Fractional seconds**: variable precision, trailing zero handling +- **Timezone**: offset format (`+05:00`, `-08:00`, `Z`), name format +- **Calendar**: Gregorian identifier +- **Era**: AD/BC +- **Roman numerals**: months I-XII +- **Width constraints**: truncation, padding, exact width +- **Empty sequence handling**: null/empty input returns empty sequence +- **Error cases**: time markers on date values, date markers on time values + +#### FnFormatDateTimeTest + +Integration tests via Metapath expression evaluation: + +- ISO 8601 format: `fn:format-dateTime($dt, "[Y0001]-[M01]-[D01]T[H01]:[m01]:[s01]")` +- Human-readable: `fn:format-dateTime($dt, "[D] [MNn] [Y]")` +- US format: `fn:format-dateTime($dt, "[M]/[D]/[Y0001]")` +- 12-hour time: `fn:format-dateTime($dt, "[h]:[m01] [P]")` +- With timezone: `fn:format-dateTime($dt, "[Y]-[M01]-[D01] [H01]:[m01] [Z]")` +- 5-arg form with language parameter + +#### FnFormatDateTest + +Integration tests: + +- Standard date formats +- Ordinal dates: `fn:format-date($d, "[D1o] [MNn] [Y]")` → `"3rd March 2026"` +- Day-of-week: `fn:format-date($d, "[FNn], [D] [MNn] [Y]")` → `"Tuesday, 3 March 2026"` (March 3, 2026 is a Tuesday) +- Error: time markers in date picture string + +#### FnFormatTimeTest + +Integration tests: + +- 24-hour time: `fn:format-time($t, "[H01]:[m01]:[s01]")` +- 12-hour time: `fn:format-time($t, "[h]:[m01]:[s01] [P]")` +- With fractional seconds: `fn:format-time($t, "[H01]:[m01]:[s01].[f001]")` +- Error: date markers in time picture string + +### 5.3 Edge Cases + +- Boundary dates: Jan 1, Dec 31, leap year Feb 29 +- Midnight and noon: hour 0 vs 12 in 12-hour format +- Negative years (BCE) +- Values with and without timezone +- Empty timezone offset +- Fractional seconds with 0 nanos vs maximum nanos +- Year values > 9999 and < 0 +- Width constraints producing truncation vs padding +- Picture string with only literals (no markers) +- Empty picture string + +### 5.4 Verification Checklist + +- [ ] All new tests pass +- [ ] All existing tests still pass +- [ ] CI build succeeds: `mvn clean install -PCI -Prelease` +- [ ] No new SpotBugs/PMD/Checkstyle violations +- [ ] Javadoc on all public/protected members +- [ ] Code coverage ≥80% for new code + +--- + +## 6. Risks and Mitigations + +| Risk | Impact | Likelihood | Mitigation | +|------|--------|------------|------------| +| Picture string spec ambiguity | Medium | Medium | Use XPath 3.1 spec as authoritative reference; test against Saxon's behavior for ambiguous cases | +| Ordinal formatting complexity | Low | Medium | English ordinals are well-defined; note limitation for other languages | +| Roman numeral edge cases | Low | Low | Only months (1-12) are commonly used; handle values 1-3999 | +| Number-to-words conversion | Medium | Medium | Use a simple English-only implementation; note limitation | +| Timezone name formatting | Low | Medium | Use Java's timezone display names; `[z]` falls back to offset format | + +--- + +## 7. Open Questions + +1. **Timezone names (`[ZN]`)**: The spec allows timezone name output when `$place` provides context. Since `ITemporalItem` stores offsets (not zone IDs), `[ZN]` will fall back to offset format (`+01:01`) unless a recognized IANA timezone name is provided in `$place`. Is this acceptable? + +--- + +## 8. Related Documents + +- [Implementation Plan](./implementation-plan.md) +- [XPath 3.1 Formatting Functions Spec](https://www.w3.org/TR/xpath-functions-31/#formatting-dates-and-times) +- [GitHub Issue #283](https://github.com/metaschema-framework/metaschema-java/issues/283) diff --git a/PRDs/20260207-format-datetime/implementation-plan.md b/PRDs/20260207-format-datetime/implementation-plan.md new file mode 100644 index 000000000..4dc3fb122 --- /dev/null +++ b/PRDs/20260207-format-datetime/implementation-plan.md @@ -0,0 +1,497 @@ +# Implementation Plan: Date/Time Formatting Functions + +This document details the implementation of `fn:format-integer`, `fn:format-dateTime`, `fn:format-date`, and `fn:format-time` for [issue #283](https://github.com/metaschema-framework/metaschema-java/issues/283). + +--- + +## Prerequisites + +- Working build from `develop` branch: `mvn install -DskipTests` +- Familiarity with existing function patterns (e.g., `FnAdjustDateTimeToTimezone`, `FnSubstring`) +- XPath Functions 3.1 spec sections [4.6.1](https://www.w3.org/TR/xpath-functions-31/#func-format-integer) and [9.8](https://www.w3.org/TR/xpath-functions-31/#formatting-dates-and-times) + +--- + +## Test-Driven Development Requirement + +All functional code changes must follow TDD: + +1. Write or update tests first to capture expected behavior +2. Verify tests fail with existing implementation +3. Make the code changes +4. Verify tests pass after changes + +--- + +## PR 1: Formatting Functions (fn:format-integer, fn:format-dateTime, fn:format-date, fn:format-time) + +| Attribute | Value | +|-----------|-------| +| **Files Changed** | ~15 | +| **Risk Level** | Medium | +| **Dependencies** | None | +| **Target Branch** | develop | +| **Status** | In Progress | +| **Pull Request** | [#659](https://github.com/metaschema-framework/metaschema-java/pull/659) | + +### Files to Create + +| File | Description | +|------|-------------| +| `core/src/main/java/.../function/FormatFunctionException.java` | Exception class for format function errors (FODF prefix, FODF1310) | +| `core/src/main/java/.../function/FormatDateTimeFunctionException.java` | Exception class for FOFD error codes (FOFD1340, FOFD1350) | +| `core/src/main/java/.../function/library/FnFormatInteger.java` | `fn:format-integer` function (2-arg and 3-arg) | +| `core/src/main/java/.../function/library/DateTimeFormatUtil.java` | Picture string parser and formatting algorithm | +| `core/src/main/java/.../function/library/FnFormatDateTime.java` | `fn:format-dateTime` function (2-arg and 5-arg) | +| `core/src/main/java/.../function/library/FnFormatDate.java` | `fn:format-date` function (2-arg and 5-arg) | +| `core/src/main/java/.../function/library/FnFormatTime.java` | `fn:format-time` function (2-arg and 5-arg) | +| `core/src/test/java/.../function/FormatFunctionExceptionTest.java` | Unit tests for FODF exception | +| `core/src/test/java/.../function/FormatDateTimeFunctionExceptionTest.java` | Unit tests for FOFD exception | +| `core/src/test/java/.../function/library/FnFormatIntegerTest.java` | Unit tests for `fn:format-integer` | +| `core/src/test/java/.../function/library/DateTimeFormatUtilTest.java` | Unit tests for picture string parser and formatting | +| `core/src/test/java/.../function/library/FnFormatDateTimeTest.java` | Integration tests for `fn:format-dateTime` | +| `core/src/test/java/.../function/library/FnFormatDateTest.java` | Integration tests for `fn:format-date` | +| `core/src/test/java/.../function/library/FnFormatTimeTest.java` | Integration tests for `fn:format-time` | + +### Files to Modify + +| File | Changes | +|------|---------| +| `core/src/main/java/.../function/library/DefaultFunctionLibrary.java` | Register 8 function signatures; remove P2 comments for format-integer, format-date, format-dateTime, format-time | + +### Implementation Approach + +#### Phase 1: Exception Classes + +1. Create `FormatFunctionExceptionTest.java`: + - Test construction with `FODF1310` (invalid format token) error code + - Test error message formatting + - Test `getCode()` returns correct value + +2. Create `FormatFunctionException.java`: + - Extend `FunctionMetapathError` + - Prefix: `"FODF"` + - Constants: + - `INVALID_FORMAT_TOKEN = 1310` (FODF1310) + +3. Create `FormatDateTimeFunctionExceptionTest.java`: + - Test construction with `FOFD1340` (invalid picture string) error code + - Test construction with `FOFD1350` (component not available) error code + - Test error message formatting + +4. Create `FormatDateTimeFunctionException.java`: + - Extend `FunctionMetapathError` + - Prefix: `"FOFD"` + - Constants: + - `INVALID_PICTURE_STRING = 1340` (FOFD1340) + - `COMPONENT_NOT_AVAILABLE = 1350` (FOFD1350) + +5. Verify tests pass. + +#### Phase 2: fn:format-integer + +1. Create `FnFormatIntegerTest.java` (extends `ExpressionTestBase`) with comprehensive tests: + + **Decimal digit patterns:** + - `format-integer(123, '1')` → `"123"` + - `format-integer(123, '0000')` → `"0123"` + - `format-integer(123, '01')` → `"123"` (never truncated) + - `format-integer(0, '1')` → `"0"` + - `format-integer(-123, '1')` → `"-123"` (negative prepends minus) + + **Alphabetic sequences:** + - `format-integer(1, 'a')` → `"a"` + - `format-integer(26, 'a')` → `"z"` + - `format-integer(27, 'a')` → `"aa"` + - `format-integer(1, 'A')` → `"A"` + - `format-integer(7, 'a')` → `"g"` + + **Roman numerals:** + - `format-integer(1, 'i')` → `"i"` + - `format-integer(4, 'i')` → `"iv"` + - `format-integer(57, 'I')` → `"LVII"` + - `format-integer(1999, 'I')` → `"MCMXCIX"` + + **Words:** + - `format-integer(123, 'w')` → `"one hundred and twenty-three"` (or similar English) + - `format-integer(1, 'W')` → `"ONE"` + - `format-integer(21, 'Ww')` → `"Twenty-One"` (or similar) + + **Ordinal modifier:** + - `format-integer(1, '1;o', 'en')` → `"1st"` + - `format-integer(2, '1;o', 'en')` → `"2nd"` + - `format-integer(3, '1;o', 'en')` → `"3rd"` + - `format-integer(4, '1;o', 'en')` → `"4th"` + - `format-integer(11, '1;o', 'en')` → `"11th"` + - `format-integer(21, '1;o', 'en')` → `"21st"` + + **Grouping separators:** + - `format-integer(1000000, '#,##0')` → `"1,000,000"` + - `format-integer(15, '#,##0')` → `"15"` + - `format-integer(1000000, "0'000")` → `"1'000'000"` + + **Empty sequence:** + - `format-integer((), '1')` → `""` (zero-length string) + + **Error cases:** + - Invalid format token → FODF1310 + + **Context dependency:** + - 2-arg: context-dependent (default language) + - 3-arg: context-independent + +2. Implement `FnFormatInteger.java`: + - `SIGNATURE_TWO_ARG` and `SIGNATURE_THREE_ARG` + - 2-arg: `.contextDependent()`, 3-arg: `.contextIndependent()` + - Handle primary format token parsing (decimal-digit-pattern, `a`/`A`, `i`/`I`, `w`/`W`/`Ww`) + - Handle format modifier parsing (`;` separator, `c`/`o`, `a`/`t`) + - Implement decimal digit pattern formatting with grouping separators + - Implement alphabetic, roman numeral, and word formatting + - Implement ordinal suffix for English + +3. Verify all format-integer tests pass. + +#### Phase 3: Picture String Parser + +1. Add parser tests to `DateTimeFormatUtilTest.java`: + + **Literal text parsing:** + - `"hello"` → single literal component `"hello"` + - `""` (empty string) → empty component list + - `" "` (whitespace) → single literal component `" "` + + **Escaped brackets:** + - `"[["` → literal `[` + - `"]]"` → literal `]` + - `"[[value]]"` → literal `[value]` + - `"[[[Y0001]-[M01]-[D01]]]"` → `[`, date, `]` (from spec example) + + **Simple variable markers:** + - Test all 16 markers: Y, M, D, d, F, W, w, H, h, P, m, s, f, Z, z, C, E + - Whitespace inside markers is ignored: `"[ Y 0001 ]"` = `"[Y0001]"` + + **Markers with first presentation modifier:** + - `"[Y0001]"` → year, decimal digit pattern `0001` + - `"[M01]"` → month, decimal digit pattern `01` + - `"[MNn]"` → month, name title-case + - `"[MN]"` → month, name uppercase + - `"[Mn]"` → month, name lowercase + - `"[Mi]"` → month, roman lowercase + - `"[MI]"` → month, roman uppercase + - `"[Mw]"` → month, words lowercase + - `"[Y9,999]"` → year with grouping separator (comma before width) + + **Markers with second presentation modifier:** + - `"[D1o]"` → day, decimal, ordinal + - `"[D1c]"` → day, decimal, cardinal (same as no modifier) + - `"[Dwo]"` → day, words, ordinal (e.g., "first") + + **Markers with width modifiers:** + - `"[M,2]"` → minimum width 2 + - `"[M,2-2]"` → exact width 2 + - `"[MNn,*-3]"` → name max 3 chars + - `"[MNn,3-3]"` → name exactly 3 chars + - `"[Y,4-4]"` → year padded/truncated to 4 + + **Complex picture strings:** + - `"[Y0001]-[M01]-[D01]"` → ISO date format + - `"[D] [MNn] [Y]"` → "5 March 2026" + - `"[FNn], [D1o] [MNn] [Y]"` → "Thursday, 5th March 2026" + - `"[h]:[m01]:[s01] [P]"` → 12-hour time with AM/PM + + **Error cases:** + - `"[X]"` → FOFD1340 (unknown marker) + - `"[Y"` → FOFD1340 (unmatched bracket) + - `"]"` alone → FOFD1340 (unmatched bracket) + - `"[M,0]"` → FOFD1340 (min width < 1) + - `"[M,3-1]"` → FOFD1340 (max < min) + +2. Implement `DateTimeFormatUtil.parsePictureString()`: + - Parse picture string character by character + - Build list of `FormatComponent` objects (inner classes or records) + - `LiteralComponent(String text)` + - `VariableMarkerComponent(char specifier, String firstModifier, Character secondModifier, Integer minWidth, Integer maxWidth)` + - Handle escape sequences `[[` and `]]` + - Ignore whitespace within variable markers + - Parse comma-separated width modifier + - Throw `FormatDateTimeFunctionException(INVALID_PICTURE_STRING, ...)` on syntax errors + +3. Verify parser tests pass. + +#### Phase 4: Formatting Engine + +1. Add formatting tests to `DateTimeFormatUtilTest.java`: + + **Year formatting (spec section 9.8.4.4 — modulo rule):** + - `[Y]` with 2026 → `"2026"` (full year, default) + - `[Y0001]` with 2026 → `"2026"` (4-digit) + - `[Y01]` with 2026 → `"26"` (modulo 10^2 because 2 digits in pattern) + - `[Y,4-4]` with year 5 → `"0005"` (padded to min width) + - `[Y1]` with 2026 → `"2026"` (single-digit = no truncation, N=infinity) + - `[Y]` with negative year → minus sign prepended + + **Month formatting:** + - `[M]` with month 3 → `"3"` (default: decimal `1`) + - `[M01]` with month 3 → `"03"` + - `[MNn]` with month 3 → `"March"` + - `[MN]` with month 3 → `"MARCH"` + - `[Mn]` with month 3 → `"march"` + - `[MNn,*-3]` with month 3 → `"Mar"` (abbreviated) + - `[MNn,3-3]` with month 3 → `"Mar"` (exact 3) + - `[MI]` with month 3 → `"III"` + - `[Mi]` with month 3 → `"iii"` + - All 12 months for name formatting + + **Day formatting:** + - `[D]` with day 5 → `"5"` (default: decimal `1`) + - `[D01]` with day 5 → `"05"` + - `[D1o]` with day 1 → `"1st"` (ordinal via second modifier) + - `[D1o]` with day 2 → `"2nd"` + - `[D1o]` with day 3 → `"3rd"` + - `[D1o]` with day 4 → `"4th"` + - `[D1o]` with day 11 → `"11th"` (special case) + - `[D1o]` with day 12 → `"12th"` + - `[D1o]` with day 13 → `"13th"` + - `[D1o]` with day 21 → `"21st"` + - `[D1o]` with day 31 → `"31st"` + + **Day of year (`d`):** + - `[d]` with Jan 1 → `"1"` + - `[d]` with Dec 31 (non-leap) → `"365"` + - `[d]` with Dec 31 (leap) → `"366"` + - `[d001]` with day 5 → `"005"` + + **Day of week (`F`):** + - `[F]` with Monday → `"monday"` (default: name `n`) + - `[F1]` with Monday → `"1"` (ISO: Mon=1) + - `[F1]` with Sunday → `"7"` + - `[FNn]` with Monday → `"Monday"` + - `[FNn,*-3]` with Wednesday → `"Wed"` + - All 7 days for name formatting + + **Week of year (`W`) and week of month (`w`):** + - `[W]` with first ISO week → `"1"` + - `[W01]` with week 1 → `"01"` + - `[w]` for various dates + + **Hour formatting (24-hour `H`, default: `1`):** + - `[H]` with hour 0 → `"0"` (midnight) + - `[H01]` with hour 0 → `"00"` + - `[H]` with hour 13 → `"13"` + - `[H]` with hour 23 → `"23"` + + **Hour formatting (12-hour `h`, default: `1`):** + - `[h]` with hour 0 → `"12"` (midnight) + - `[h]` with hour 12 → `"12"` (noon) + - `[h]` with hour 13 → `"1"` + - `[h]` with hour 23 → `"11"` + + **AM/PM (`P`, default: name `n`):** + - `[P]` with hour 0 → `"am"` (default: lowercase name) + - `[PN]` with hour 0 → `"AM"` + - `[Pn]` with hour 12 → `"pm"` + - `[PN]` with hour 12 → `"PM"` + + **Minute (default: `01`) and Second (default: `01`):** + - `[m]` with minute 5 → `"05"` (default is `01`, zero-padded) + - `[m1]` with minute 5 → `"5"` (explicit single-digit) + - `[s]` with second 0 → `"00"` (default is `01`) + - `[s1]` with second 9 → `"9"` + + **Fractional seconds (`f`, spec section 9.8.4.5):** + - `[f1]` with 123456789 nanos → all digits (single-digit = no constraint) + - `[f01]` with 100000000 nanos → `"10"` (hundredths) + - `[f001]` with 123456789 nanos → `"123"` (milliseconds) + - `[f001]` with 0 nanos → `"000"` + - `[f1,1-1]` with 123456789 nanos → `"1"` (exactly 1 digit via width) + - Truncation, not rounding + + **Timezone (`Z`, spec section 9.8.4.6):** + - `[Z]` with +05:00 → `"+05:00"` (default `01:01`) + - `[Z]` with -08:00 → `"-08:00"` + - `[Z]` with UTC → `"+00:00"` + - `[Z]` with no timezone → `""` (empty output) + - `[Z0]` with +05:00 → `"+5"` + - `[Z0]` with +05:30 → `"+5:30"` (minutes appended if non-zero) + - `[Z0:00]` with +05:00 → `"+5:00"` (always show minutes) + - `[Z00:00]` with +05:00 → `"+05:00"` (zero-padded) + - `[Z0000]` with +05:30 → `"+0530"` (no separator) + - `[Z01:01t]` with UTC → `"Z"` (t modifier) + - `[Z01:01t]` with +05:00 → `"+05:00"` (non-UTC still signed) + - `[ZZ]` with UTC → `"Z"` (military) + - `[ZZ]` with +01:00 → `"A"` (military) + - `[ZZ]` with -01:00 → `"N"` (military) + - `[ZZ]` with +12:00 → `"M"` (military) + - `[ZZ]` with -12:00 → `"Y"` (military) + - `[ZZ]` with +05:30 → `"+05:30"` (no military letter, fallback) + - `[ZZ]` with no timezone → `"J"` (local time) + - `[ZN]` fallback to offset format (no place argument) + + **Timezone `z` (GMT prefix):** + - `[z]` with +05:00 → `"GMT+05:00"` + - `[z]` with UTC → `"GMT+00:00"` + - `[z]` with no timezone → `""` (empty) + + **Calendar (`C`) and Era (`E`):** + - `[C]` → `"ad"` (default: name `n`, lowercase) + - `[CN]` → `"AD"` + - `[E]` with positive year → `"ad"` (default: name `n`) + - `[EN]` with negative year → `"BC"` + + **Width modifier enforcement:** + - `[MNn,3-3]` truncates `"January"` to `"Jan"` + - `[MNn,3-3]` with `"May"` → `"May"` (already 3) + - `[MNn,*-3]` abbreviates conventionally or truncates + - Padding: shorter than min → pad with spaces + + **Spec examples (section 9.8.5) as integration tests:** + - `format-date($d, "[Y0001]-[M01]-[D01]")` → `"2002-12-31"` (with $d = 2002-12-31) + - `format-date($d, "[D1] [MI] [Y]")` → `"31 XII 2002"` + - `format-date($d, "[D1o] [MNn], [Y]", "en", (), ())` → `"31st December, 2002"` + - `format-date($d, "[D01] [MN,*-3] [Y0001]", "en", (), ())` → `"31 DEC 2002"` + - `format-date($d, "[MNn] [D], [Y]", "en", (), ())` → `"December 31, 2002"` + - `format-date($d, "[[[Y0001]-[M01]-[D01]]]")` → `"[2002-12-31]"` (escaped brackets) + - `format-time($t, "[h]:[m01] [PN]", "en", (), ())` → `"3:58 PM"` (with $t = 15:58:45) + - `format-time($t, "[h]:[m01]:[s01] [Pn]", "en", (), ())` → `"3:58:45 pm"` + - `format-time($t, "[H01]:[m01]")` → `"15:58"` + - `format-time($t, "[H01]:[m01]:[s01].[f001]")` → `"15:58:45.762"` (with fractional seconds) + - `format-dateTime($dt, "[h].[m01][Pn] on [FNn], [D1o] [MNn]")` → `"3.58pm on Tuesday, 31st December"` + - `format-dateTime($dt, "[M01]/[D01]/[Y0001] at [H01]:[m01]:[s01]")` → `"12/31/2002 at 15:58:45"` + +2. Implement `DateTimeFormatUtil`: + - `parsePictureString(String picture)` → list of components + - `formatDateTime(ITemporalItem value, String picture, String language, String calendar, String place, Set allowedMarkers)` → formatted string + - Delegate integer formatting to `FnFormatInteger` for numeric components + - Handle timezone formatting per spec section 9.8.4.6 + - Handle year modulo per spec section 9.8.4.4 + - Handle fractional seconds per spec section 9.8.4.5 + +3. Verify formatting tests pass. + +#### Phase 5: Function Classes and Registration + +1. Create `FnFormatDateTimeTest.java`, `FnFormatDateTest.java`, `FnFormatTimeTest.java` (extend `ExpressionTestBase`): + - Use `@ParameterizedTest` with `@MethodSource` pattern + - Include spec examples from section 9.8.5 + - Test empty sequence → empty sequence + - Test 2-arg and 5-arg forms + - Test FOFD1350 for invalid marker/value-type combinations + +2. Implement function classes: + + All three follow the same pattern: + - **Both arities are context-dependent** (depend on implicit timezone) + - 2-arg: `.contextDependent()` (depends on default calendar, language, place, implicit timezone) + - 5-arg: `.contextDependent()` (depends on implicit timezone, namespaces) + + ```java + // Both signatures must use .contextDependent() + .deterministic() + .contextDependent() + .focusIndependent() + ``` + +3. Register all 8 signatures in `DefaultFunctionLibrary.java`: + - Remove P2 comments for `format-integer`, `format-date`, `format-dateTime`, `format-time` + - Keep P2 comment for `format-number` + + ```java + // https://www.w3.org/TR/xpath-functions-31/#func-format-integer + registerFunction(FnFormatInteger.SIGNATURE_TWO_ARG); + registerFunction(FnFormatInteger.SIGNATURE_THREE_ARG); + // https://www.w3.org/TR/xpath-functions-31/#func-format-dateTime + registerFunction(FnFormatDateTime.SIGNATURE_TWO_ARG); + registerFunction(FnFormatDateTime.SIGNATURE_FIVE_ARG); + // https://www.w3.org/TR/xpath-functions-31/#func-format-date + registerFunction(FnFormatDate.SIGNATURE_TWO_ARG); + registerFunction(FnFormatDate.SIGNATURE_FIVE_ARG); + // https://www.w3.org/TR/xpath-functions-31/#func-format-time + registerFunction(FnFormatTime.SIGNATURE_TWO_ARG); + registerFunction(FnFormatTime.SIGNATURE_FIVE_ARG); + ``` + +4. Verify all tests pass: `mvn -pl core test` + +5. Run CI build: `mvn clean install -PCI -Prelease` + +### Acceptance Criteria + +#### fn:format-integer + +- [x] `FormatFunctionException` created with FODF1310 error code +- [x] Decimal digit patterns: `1`, `01`, `001`, `0000`, etc. +- [x] Grouping separators: `#,##0`, `#'##0`, etc. +- [x] Alphabetic sequences: `a`, `A` +- [x] Roman numerals: `i`, `I` +- [x] Words: `w`, `W`, `Ww` +- [x] Format modifier: ordinal (`o`) with English suffixes (1st, 2nd, 3rd, 4th, ...) +- [x] Empty sequence returns zero-length string +- [x] Negative values prepend minus sign +- [x] Invalid format token raises FODF1310 +- [x] 2-arg and 3-arg signatures registered + +#### Date/Time Formatting + +- [x] `FormatDateTimeFunctionException` with FOFD1340 and FOFD1350 error codes +- [x] Picture string parser handles all 16 variable markers +- [x] Picture string parser handles first and second presentation modifiers +- [x] Picture string parser handles width modifiers with validation +- [x] Picture string parser handles escaped brackets and literal text +- [x] Picture string parser ignores whitespace within variable markers +- [x] Picture string parser rejects invalid syntax with FOFD1340 +- [x] Year formatting uses modulo rule per spec 9.8.4.4 +- [x] Fractional seconds use reverse-digit algorithm per spec 9.8.4.5 +- [x] Timezone formatting handles all spec 9.8.4.6 variants (numeric, military, name, GMT prefix) +- [x] Timezone produces empty output when value has no timezone (except military `ZZ` → `J`) +- [x] Integer-valued components use `fn:format-integer` rules per spec 9.8.4.3 +- [x] `fn:format-dateTime` works with 2-arg and 5-arg signatures +- [x] `fn:format-date` works with 2-arg and 5-arg signatures +- [x] `fn:format-date` rejects time-only markers with FOFD1350 +- [x] `fn:format-time` works with 2-arg and 5-arg signatures +- [x] `fn:format-time` rejects date-only and era markers with FOFD1350 +- [x] All 8 function signatures registered in `DefaultFunctionLibrary` +- [x] Both arities marked as context-dependent per spec +- [x] Empty sequence input returns empty sequence for all functions +- [x] `$language` defaults to English; unsupported languages fall back +- [x] `$calendar` defaults to Gregorian (`"AD"`); unsupported calendars fall back +- [x] `$place` supports IANA timezone names for timezone adjustment + +#### Quality + +- [x] Spec examples from section 9.8.5 pass as integration tests +- [x] Javadoc on all public/protected members +- [x] All existing tests still pass +- [x] `mvn clean install -PCI -Prelease` succeeds with no new violations + +--- + +## PR Summary Table + +| PR | Description | Files | Risk | Dependencies | Status | +|----|-------------|-------|------|--------------|--------| +| 1 | Formatting functions (format-integer + date/time) | ~15 | Medium | None | In Progress | + +**Total PRs**: 1 +**Total Files**: ~15 + +--- + +## Files Changed Summary + +| File | Change Type | +|------|-------------| +| `core/src/main/java/.../function/FormatFunctionException.java` | New | +| `core/src/main/java/.../function/FormatDateTimeFunctionException.java` | New | +| `core/src/main/java/.../function/library/FnFormatInteger.java` | New | +| `core/src/main/java/.../function/library/DateTimeFormatUtil.java` | New | +| `core/src/main/java/.../function/library/FnFormatDateTime.java` | New | +| `core/src/main/java/.../function/library/FnFormatDate.java` | New | +| `core/src/main/java/.../function/library/FnFormatTime.java` | New | +| `core/src/test/java/.../function/FormatFunctionExceptionTest.java` | New | +| `core/src/test/java/.../function/FormatDateTimeFunctionExceptionTest.java` | New | +| `core/src/test/java/.../function/library/FnFormatIntegerTest.java` | New | +| `core/src/test/java/.../function/library/DateTimeFormatUtilTest.java` | New | +| `core/src/test/java/.../function/library/FnFormatDateTimeTest.java` | New | +| `core/src/test/java/.../function/library/FnFormatDateTest.java` | New | +| `core/src/test/java/.../function/library/FnFormatTimeTest.java` | New | +| `core/src/main/java/.../function/library/DefaultFunctionLibrary.java` | Modified | diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/FormatDateTimeFunctionException.java b/core/src/main/java/dev/metaschema/core/metapath/function/FormatDateTimeFunctionException.java new file mode 100644 index 000000000..27c7bf1d5 --- /dev/null +++ b/core/src/main/java/dev/metaschema/core/metapath/function/FormatDateTimeFunctionException.java @@ -0,0 +1,81 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function; + +import dev.metaschema.core.metapath.IErrorCode; +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * FOFD: Exceptions related to errors in formatting date/time values. + * + * @see + * XPath Functions 3.1 - Formatting Dates and Times + */ +public class FormatDateTimeFunctionException + extends FunctionMetapathError { + @NonNull + private static final String PREFIX = "FOFD"; + /** + * err:FOFD1340: + * Raised when the picture string supplied to a date/time formatting function + * does not conform to the required syntax. + */ + public static final int INVALID_PICTURE_STRING = 1340; + /** + * err:FOFD1350: + * Raised when a component specifier within a picture string refers to a + * component that is not available in the value being formatted. + */ + public static final int COMPONENT_NOT_AVAILABLE = 1350; + + /** + * the serial version UID. + */ + private static final long serialVersionUID = 1L; + + /** + * Constructs a new exception with the provided {@code code}, {@code message}, + * and no cause. + * + * @param code + * the error code value + * @param message + * the exception message + */ + public FormatDateTimeFunctionException(int code, String message) { + super(IErrorCode.of(PREFIX, code), message); + } + + /** + * Constructs a new exception with the provided {@code code}, {@code message}, + * and {@code cause}. + * + * @param code + * the error code value + * @param message + * the exception message + * @param cause + * the original exception cause + */ + public FormatDateTimeFunctionException(int code, String message, Throwable cause) { + super(IErrorCode.of(PREFIX, code), message, cause); + } + + /** + * Constructs a new exception with the provided {@code code}, no message, and + * the {@code cause}. + * + * @param code + * the error code value + * @param cause + * the original exception cause + */ + public FormatDateTimeFunctionException(int code, Throwable cause) { + super(IErrorCode.of(PREFIX, code), cause); + } +} diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/FormatFunctionException.java b/core/src/main/java/dev/metaschema/core/metapath/function/FormatFunctionException.java new file mode 100644 index 000000000..ed3cb482a --- /dev/null +++ b/core/src/main/java/dev/metaschema/core/metapath/function/FormatFunctionException.java @@ -0,0 +1,72 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function; + +import dev.metaschema.core.metapath.IErrorCode; +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * FODF: Exceptions related to formatting errors in Metapath functions such as + * {@code format-integer}, {@code format-number}, {@code format-dateTime}, and + * {@code format-date}. + */ +public class FormatFunctionException + extends FunctionMetapathError { + @NonNull + private static final String PREFIX = "FODF"; + /** + * err:FODF1310: + * Raised when a format token in a picture string is invalid. + */ + public static final int INVALID_FORMAT_TOKEN = 1310; + + /** + * the serial version UID. + */ + private static final long serialVersionUID = 1L; + + /** + * Constructs a new exception with the provided {@code code}, {@code message}, + * and no cause. + * + * @param code + * the error code value + * @param message + * the exception message + */ + public FormatFunctionException(int code, String message) { + super(IErrorCode.of(PREFIX, code), message); + } + + /** + * Constructs a new exception with the provided {@code code}, {@code message}, + * and {@code cause}. + * + * @param code + * the error code value + * @param message + * the exception message + * @param cause + * the original exception cause + */ + public FormatFunctionException(int code, String message, Throwable cause) { + super(IErrorCode.of(PREFIX, code), message, cause); + } + + /** + * Constructs a new exception with the provided {@code code}, no message, and + * the {@code cause}. + * + * @param code + * the error code value + * @param cause + * the original exception cause + */ + public FormatFunctionException(int code, Throwable cause) { + super(IErrorCode.of(PREFIX, code), cause); + } +} diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/library/DateTimeFormatUtil.java b/core/src/main/java/dev/metaschema/core/metapath/function/library/DateTimeFormatUtil.java new file mode 100644 index 000000000..bfdb65cb7 --- /dev/null +++ b/core/src/main/java/dev/metaschema/core/metapath/function/library/DateTimeFormatUtil.java @@ -0,0 +1,1359 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import java.time.DayOfWeek; +import java.time.LocalDate; +import java.time.ZoneOffset; +import java.time.temporal.IsoFields; +import java.time.temporal.WeekFields; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Set; + +import dev.metaschema.core.metapath.function.FormatDateTimeFunctionException; +import dev.metaschema.core.metapath.item.atomic.IIntegerItem; +import dev.metaschema.core.metapath.item.atomic.ITemporalItem; +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Utility class for parsing and formatting date/time picture strings as defined + * in XPath + * Functions 3.1 Section 9.8. + */ +public final class DateTimeFormatUtil { + /** + * The set of valid component specifier characters recognized in picture string + * variable markers. + */ + private static final Set VALID_SPECIFIERS = Set.of( + 'Y', 'M', 'D', 'd', 'F', 'W', 'w', 'H', 'h', 'P', 'm', 's', 'f', 'Z', + 'z', 'C', 'E'); + + /** + * The set of valid second presentation modifier characters that may appear as + * the last character of a multi-character presentation modifier string. + */ + private static final Set SECOND_MODIFIERS = Set.of('a', 't', 'c', 'o'); + + private DateTimeFormatUtil() { + // utility class + } + + /** + * Base class for components of a parsed picture string. + */ + public static class FormatComponent { + /** + * Protected constructor to prevent direct instantiation. + */ + protected FormatComponent() { + // marker class + } + } + + /** + * A literal text component in a picture string. + */ + public static class LiteralComponent + extends FormatComponent { + @NonNull + private final String text; + + /** + * Construct a new literal component. + * + * @param text + * the literal text + */ + public LiteralComponent(@NonNull String text) { + this.text = text; + } + + /** + * Get the literal text. + * + * @return the text + */ + @NonNull + public String getText() { + return text; + } + } + + /** + * A variable marker component in a picture string, representing a date/time + * component to be formatted. + */ + public static class VariableMarkerComponent + extends FormatComponent { + private final char specifier; + @Nullable + private final String primaryModifier; + @Nullable + private final Character secondModifier; + @Nullable + private final Integer minWidth; + @Nullable + private final Integer maxWidth; + + /** + * Construct a new variable marker component. + * + * @param specifier + * the component specifier character + * @param primaryModifier + * the first presentation modifier, or {@code null} + * @param secondModifier + * the second presentation modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + */ + public VariableMarkerComponent( + char specifier, + @Nullable String primaryModifier, + @Nullable Character secondModifier, + @Nullable Integer minWidth, + @Nullable Integer maxWidth) { + this.specifier = specifier; + this.primaryModifier = primaryModifier; + this.secondModifier = secondModifier; + this.minWidth = minWidth; + this.maxWidth = maxWidth; + } + + /** + * Get the component specifier character. + * + * @return the specifier + */ + public char getSpecifier() { + return specifier; + } + + /** + * Get the primary presentation modifier. + * + * @return the primary modifier, or {@code null} if not specified + */ + @Nullable + public String getPrimaryModifier() { + return primaryModifier; + } + + /** + * Get the second presentation modifier. + * + * @return the second modifier character, or {@code null} if not specified + */ + @Nullable + public Character getSecondModifier() { + return secondModifier; + } + + /** + * Get the minimum width. + * + * @return the minimum width, or {@code null} if not specified + */ + @Nullable + public Integer getMinWidth() { + return minWidth; + } + + /** + * Get the maximum width. + * + * @return the maximum width, or {@code null} if not specified + */ + @Nullable + public Integer getMaxWidth() { + return maxWidth; + } + } + + /** + * Parse a picture string into a list of format components. + *

+ * The picture string consists of literal substrings and variable markers + * enclosed in square brackets. Doubled brackets {@code [[} and {@code ]]} are + * treated as escaped literal brackets. + * + * @param picture + * the picture string to parse + * @return an unmodifiable list of format components + * @throws FormatDateTimeFunctionException + * with {@link FormatDateTimeFunctionException#INVALID_PICTURE_STRING} + * if the picture string syntax is invalid + * @see XPath + * Functions 3.1 - Date Picture String + */ + @NonNull + public static List parsePictureString(@NonNull String picture) { + List components = new ArrayList<>(); + StringBuilder literal = new StringBuilder(); + int length = picture.length(); + int index = 0; + + while (index < length) { + char ch = picture.charAt(index); + + if (ch == '[') { + // Check for escaped open bracket + if (index + 1 < length && picture.charAt(index + 1) == '[') { + literal.append('['); + index += 2; + } else { + // Flush any accumulated literal text + if (literal.length() > 0) { + components.add(new LiteralComponent(literal.toString())); + literal.setLength(0); + } + + // Find the closing bracket + int closeIndex = picture.indexOf(']', index + 1); + if (closeIndex < 0) { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "Unmatched '[' in picture string: " + picture); + } + + String markerContent = picture.substring(index + 1, closeIndex); + components.add(parseVariableMarker(markerContent, picture)); + index = closeIndex + 1; + } + } else if (ch == ']') { + // Check for escaped close bracket + if (index + 1 < length && picture.charAt(index + 1) == ']') { + literal.append(']'); + index += 2; + } else { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "Unmatched ']' in picture string: " + picture); + } + } else { + literal.append(ch); + index++; + } + } + + // Flush any remaining literal text + if (literal.length() > 0) { + components.add(new LiteralComponent(literal.toString())); + } + + return Collections.unmodifiableList(components); + } + + /** + * Parse the content of a variable marker (the text between {@code [} and + * {@code ]}) into a {@link VariableMarkerComponent}. + * + * @param content + * the raw content between the brackets + * @param picture + * the full picture string, used for error messages + * @return a new variable marker component + * @throws FormatDateTimeFunctionException + * with {@link FormatDateTimeFunctionException#INVALID_PICTURE_STRING} + * if the marker syntax is invalid + */ + @NonNull + private static VariableMarkerComponent parseVariableMarker( + @NonNull String content, + @NonNull String picture) { + // Strip all whitespace + String stripped = content.replaceAll("\\s", ""); + + if (stripped.isEmpty()) { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "Empty variable marker in picture string: " + picture); + } + + // First character is the component specifier + char specifier = stripped.charAt(0); + if (!VALID_SPECIFIERS.contains(specifier)) { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "Invalid component specifier '" + specifier + + "' in picture string: " + picture); + } + + // Remaining string contains presentation + width + String remaining = stripped.substring(1); + + // Find the LAST comma to split presentation from width + String presentationPart; + String widthPart; + int lastComma = remaining.lastIndexOf(','); + if (lastComma >= 0) { + presentationPart = remaining.substring(0, lastComma); + widthPart = remaining.substring(lastComma + 1); + } else { + presentationPart = remaining; + widthPart = null; + } + + // Parse presentation part + String primaryModifier = null; + Character secondModifier = null; + + if (!presentationPart.isEmpty()) { + if (presentationPart.length() == 1) { + // Single character is always the primary modifier + primaryModifier = presentationPart; + } else { + // More than one character: check if last char is a valid second modifier + char lastChar = presentationPart.charAt(presentationPart.length() - 1); + if (SECOND_MODIFIERS.contains(lastChar)) { + secondModifier = lastChar; + String primary = presentationPart.substring(0, presentationPart.length() - 1); + primaryModifier = primary.isEmpty() ? null : primary; + } else { + primaryModifier = presentationPart; + } + } + } + + // Parse width part + Integer minWidth = null; + Integer maxWidth = null; + + if (widthPart != null) { + Integer[] widths = new Integer[2]; + parseWidth(widthPart, picture, widths); + minWidth = widths[0]; + maxWidth = widths[1]; + } + + return new VariableMarkerComponent(specifier, primaryModifier, secondModifier, + minWidth, maxWidth); + } + + /** + * Parse a width specification string of the form {@code min-max} or + * {@code min}, where either value may be {@code *} to indicate unbounded. + * + * @param widthPart + * the width specification string + * @param picture + * the full picture string, used for error messages + * @param result + * a two-element array to receive the parsed minimum (index 0) and + * maximum (index 1) width values; {@code null} indicates unbounded + * @throws FormatDateTimeFunctionException + * with {@link FormatDateTimeFunctionException#INVALID_PICTURE_STRING} + * if the width specification is invalid + */ + private static void parseWidth( + @NonNull String widthPart, + @NonNull String picture, + @NonNull Integer[] result) { + int dashIndex = widthPart.indexOf('-'); + String minStr; + String maxStr; + + if (dashIndex >= 0) { + minStr = widthPart.substring(0, dashIndex); + maxStr = widthPart.substring(dashIndex + 1); + } else { + minStr = widthPart; + maxStr = null; + } + + Integer minWidth = parseWidthValue(minStr, picture); + Integer maxWidth = maxStr != null ? parseWidthValue(maxStr, picture) : null; + + // Validate min-width >= 1 + if (minWidth != null && minWidth < 1) { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "Minimum width must be at least 1 in picture string: " + picture); + } + + // Validate max-width >= min-width + if (minWidth != null && maxWidth != null && maxWidth < minWidth) { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "Maximum width must not be less than minimum width in picture string: " + + picture); + } + + result[0] = minWidth; + result[1] = maxWidth; + } + + /** + * Parse a single width value, which may be a positive integer or {@code *} for + * unbounded. + * + * @param value + * the width value string + * @param picture + * the full picture string, used for error messages + * @return the parsed integer value, or {@code null} if the value is {@code *} + * @throws FormatDateTimeFunctionException + * with {@link FormatDateTimeFunctionException#INVALID_PICTURE_STRING} + * if the value cannot be parsed + */ + @Nullable + private static Integer parseWidthValue(@NonNull String value, @NonNull String picture) { + if ("*".equals(value)) { + return null; + } + try { + return Integer.parseInt(value); + } catch (NumberFormatException ex) { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "Invalid width value '" + value + "' in picture string: " + picture, + ex); + } + } + + // ==================================================================== + // Formatting Engine + // ==================================================================== + + /** + * English month names indexed from 0 (January) to 11 (December). + */ + private static final String[] MONTH_NAMES = { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December" + }; + + /** + * English day-of-week names indexed from 0 (Monday) to 6 (Sunday), matching ISO + * 8601 numbering where Monday is day 1. + */ + private static final String[] DAY_NAMES = { + "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" + }; + + /** + * Format a temporal value according to a picture string. + *

+ * This method implements the formatting algorithm defined in + * + * XPath Functions 3.1 Section 9.8. The picture string is parsed into + * literal and variable marker components, and each variable marker is formatted + * according to its component specifier, presentation modifier, and width + * modifier. + * + * @param value + * the temporal value to format + * @param picture + * the picture string + * @param language + * the language for names, or {@code null} for English + * @param calendar + * the calendar system, or {@code null} for Gregorian + * @param place + * the place for timezone, or {@code null} + * @param allowedMarkers + * the set of allowed component specifiers + * @return the formatted string + * @throws FormatDateTimeFunctionException + * if formatting fails + */ + @NonNull + public static String formatDateTime( + @NonNull ITemporalItem value, + @NonNull String picture, + @Nullable String language, + @Nullable String calendar, + @Nullable String place, + @NonNull Set allowedMarkers) { + List components = parsePictureString(picture); + StringBuilder result = new StringBuilder(); + + for (FormatComponent component : components) { + if (component instanceof LiteralComponent) { + result.append(((LiteralComponent) component).getText()); + } else { + VariableMarkerComponent marker = (VariableMarkerComponent) component; + char specifier = marker.getSpecifier(); + + if (!allowedMarkers.contains(specifier)) { + throw new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.COMPONENT_NOT_AVAILABLE, + "Component specifier '" + specifier + + "' is not available for this type in picture string: " + picture); + } + + result.append(formatComponent(value, marker, language)); + } + } + + return result.toString(); + } + + /** + * Format a single variable marker component. + * + * @param value + * the temporal value + * @param marker + * the variable marker component + * @param language + * the language for locale-dependent formatting, or {@code null} + * @return the formatted string for this component + */ + @NonNull + private static String formatComponent( + @NonNull ITemporalItem value, + @NonNull VariableMarkerComponent marker, + @Nullable String language) { + char specifier = marker.getSpecifier(); + String primaryMod = marker.getPrimaryModifier(); + Character secondMod = marker.getSecondModifier(); + Integer minWidth = marker.getMinWidth(); + Integer maxWidth = marker.getMaxWidth(); + + switch (specifier) { + case 'Y': + return formatYear(value, primaryMod, secondMod, minWidth, maxWidth, language); + case 'M': + return formatNameableComponent(value.getMonth(), MONTH_NAMES, 1, + primaryMod, secondMod, minWidth, maxWidth, language, "1"); + case 'D': + return formatIntegerComponent(value.getDay(), + primaryMod, secondMod, minWidth, maxWidth, language, "1"); + case 'd': + return formatDayOfYear(value, primaryMod, secondMod, minWidth, maxWidth, language); + case 'F': + return formatDayOfWeek(value, primaryMod, secondMod, minWidth, maxWidth, language); + case 'W': + return formatWeekOfYear(value, primaryMod, secondMod, minWidth, maxWidth, language); + case 'w': + return formatWeekOfMonth(value, primaryMod, secondMod, minWidth, maxWidth, language); + case 'H': + return formatIntegerComponent(value.getHour(), + primaryMod, secondMod, minWidth, maxWidth, language, "1"); + case 'h': + return formatIntegerComponent(hourIn12(value.getHour()), + primaryMod, secondMod, minWidth, maxWidth, language, "1"); + case 'P': + return formatAmPm(value.getHour(), primaryMod, secondMod, minWidth, maxWidth); + case 'm': + return formatIntegerComponent(value.getMinute(), + primaryMod, secondMod, minWidth, maxWidth, language, "01"); + case 's': + return formatIntegerComponent(value.getSecond(), + primaryMod, secondMod, minWidth, maxWidth, language, "01"); + case 'f': + return formatFractionalSeconds(value.getNano(), primaryMod, minWidth, maxWidth); + case 'Z': + return formatTimezone(value, primaryMod, secondMod); + case 'z': + return formatGmtTimezone(value, primaryMod, secondMod); + case 'C': + return formatCalendar(primaryMod, minWidth, maxWidth); + case 'E': + return formatEra(value.getYear(), primaryMod, minWidth, maxWidth); + default: + // Should not happen since VALID_SPECIFIERS already checked + return ""; + } + } + + /** + * Convert a 24-hour hour value to 12-hour format. + * + * @param hour24 + * the hour in 24-hour format (0-23) + * @return the hour in 12-hour format (1-12) + */ + private static int hourIn12(int hour24) { + int h = hour24 % 12; + return h == 0 ? 12 : h; + } + + /** + * Format a year value with special modulo handling per spec 9.8.4.4. + * + * @param value + * the temporal value + * @param primaryMod + * the primary presentation modifier, or {@code null} + * @param secondMod + * the second presentation modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @param language + * the language for formatting, or {@code null} + * @return the formatted year string + */ + @NonNull + private static String formatYear( + @NonNull ITemporalItem value, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + @Nullable String language) { + // Use long arithmetic and clamp to avoid overflow for Integer.MIN_VALUE + int year = (int) Math.min(Math.abs((long) value.getYear()), Integer.MAX_VALUE); + + // Determine the effective format token + String effectiveToken = primaryMod != null ? primaryMod : "1"; + + // Spec 9.8.4.4: Determine N for modulo rule + // If maxWidth defines a finite value -> N = maxWidth + // Else if format token is a decimal digit pattern with W>=2 mandatory digits -> + // N = W + // Else N = infinity (output full year) + int moduloN = Integer.MAX_VALUE; + + if (maxWidth != null) { + moduloN = maxWidth; + } else { + int mandatoryDigits = countMandatoryDigits(effectiveToken); + if (mandatoryDigits >= 2 && isDecimalDigitPattern(effectiveToken)) { + moduloN = mandatoryDigits; + } + } + + // Apply modulo if N is finite + int displayYear = year; + if (moduloN < Integer.MAX_VALUE) { + int divisor = (int) Math.pow(10, moduloN); + displayYear = year % divisor; + } + + // Format the value + String formatted = formatIntegerValue(displayYear, effectiveToken, secondMod, language); + + // Apply width modifiers + formatted = applyWidthModifiers(formatted, minWidth, maxWidth, false); + + // Prepend minus for negative years + if (value.getYear() < 0) { + formatted = "-" + formatted; + } + + return formatted; + } + + /** + * Format a component that can be displayed either as a number or as a name + * (e.g., months, days of week). + * + * @param componentValue + * the numeric value of the component + * @param names + * the array of names (0-indexed offset from {@code nameOffset}) + * @param nameOffset + * the offset subtracted from {@code componentValue} to get the name + * array index + * @param primaryMod + * the primary presentation modifier, or {@code null} + * @param secondMod + * the second presentation modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @param language + * the language for formatting, or {@code null} + * @param defaultToken + * the default format token when no primary modifier is specified + * @return the formatted string + */ + @NonNull + private static String formatNameableComponent( + int componentValue, + @NonNull String[] names, + int nameOffset, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + @Nullable String language, + @NonNull String defaultToken) { + String effective = primaryMod != null ? primaryMod : defaultToken; + + // Check if this is a name format + if (isNameFormat(effective)) { + String name = names[componentValue - nameOffset]; + name = applyNameCase(name, effective); + return applyWidthModifiers(name, minWidth, maxWidth, true); + } + + return formatIntegerComponent(componentValue, primaryMod, secondMod, + minWidth, maxWidth, language, defaultToken); + } + + /** + * Format a simple integer-valued component. + * + * @param componentValue + * the integer value + * @param primaryMod + * the primary presentation modifier, or {@code null} + * @param secondMod + * the second presentation modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @param language + * the language for formatting, or {@code null} + * @param defaultToken + * the default format token + * @return the formatted string + */ + @NonNull + private static String formatIntegerComponent( + int componentValue, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + @Nullable String language, + @NonNull String defaultToken) { + String effectiveToken = primaryMod != null ? primaryMod : defaultToken; + + String formatted = formatIntegerValue(componentValue, effectiveToken, secondMod, language); + return applyWidthModifiers(formatted, minWidth, maxWidth, false); + } + + /** + * Format an integer using {@link FnFormatInteger#fnFormatInteger}, delegating + * numeric, alphabetic, roman, and word formatting to the XPath format-integer + * implementation. + * + * @param componentValue + * the integer value to format + * @param formatToken + * the primary format token (e.g., "1", "01", "i", "w") + * @param secondMod + * the second modifier character (e.g., 'o' for ordinal), or + * {@code null} + * @param language + * the language for locale-dependent formatting, or {@code null} + * @return the formatted string + */ + @NonNull + private static String formatIntegerValue( + int componentValue, + @NonNull String formatToken, + @Nullable Character secondMod, + @Nullable String language) { + // Build the format-integer picture + String picture = formatToken; + if (secondMod != null && secondMod == 'o') { + picture = picture + ";o"; + } + + return FnFormatInteger.fnFormatInteger( + IIntegerItem.valueOf(componentValue), + picture, + language); + } + + /** + * Format the day-of-year component. + * + * @param value + * the temporal value + * @param primaryMod + * the primary modifier, or {@code null} + * @param secondMod + * the second modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @param language + * the language, or {@code null} + * @return the formatted day-of-year string + */ + @NonNull + private static String formatDayOfYear( + @NonNull ITemporalItem value, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + @Nullable String language) { + // Use proxy year >= 1 because LocalDate.of does not support year <= 0 + int proxyYear = Math.max(1, value.getYear()); + int dayOfYear = LocalDate.of(proxyYear, value.getMonth(), value.getDay()).getDayOfYear(); + return formatIntegerComponent(dayOfYear, primaryMod, secondMod, minWidth, maxWidth, language, "1"); + } + + /** + * Format the day-of-week component. The default presentation modifier for F is + * "n" (lowercase name). + * + * @param value + * the temporal value + * @param primaryMod + * the primary modifier, or {@code null} + * @param secondMod + * the second modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @param language + * the language, or {@code null} + * @return the formatted day-of-week string + */ + @NonNull + private static String formatDayOfWeek( + @NonNull ITemporalItem value, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + @Nullable String language) { + // Use proxy year >= 1 because LocalDate.of does not support year <= 0 + int proxyYear = Math.max(1, value.getYear()); + DayOfWeek dow = LocalDate.of(proxyYear, value.getMonth(), value.getDay()).getDayOfWeek(); + int isoValue = dow.getValue(); // Mon=1..Sun=7 + + // Default for F is "n" (lowercase name) + String effective = primaryMod != null ? primaryMod : "n"; + + if (isNameFormat(effective)) { + String name = DAY_NAMES[isoValue - 1]; + name = applyNameCase(name, effective); + return applyWidthModifiers(name, minWidth, maxWidth, true); + } + + return formatIntegerComponent(isoValue, primaryMod, secondMod, + minWidth, maxWidth, language, "n"); + } + + /** + * Format the ISO week-of-year component. + * + * @param value + * the temporal value + * @param primaryMod + * the primary modifier, or {@code null} + * @param secondMod + * the second modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @param language + * the language, or {@code null} + * @return the formatted week-of-year string + */ + @NonNull + private static String formatWeekOfYear( + @NonNull ITemporalItem value, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + @Nullable String language) { + // Use proxy year >= 1 because LocalDate.of does not support year <= 0 + int proxyYear = Math.max(1, value.getYear()); + int week = LocalDate.of(proxyYear, value.getMonth(), value.getDay()) + .get(IsoFields.WEEK_OF_WEEK_BASED_YEAR); + return formatIntegerComponent(week, primaryMod, secondMod, minWidth, maxWidth, language, "1"); + } + + /** + * Format the week-of-month component. + * + * @param value + * the temporal value + * @param primaryMod + * the primary modifier, or {@code null} + * @param secondMod + * the second modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @param language + * the language, or {@code null} + * @return the formatted week-of-month string + */ + @NonNull + private static String formatWeekOfMonth( + @NonNull ITemporalItem value, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + @Nullable String language) { + // Use proxy year >= 1 because LocalDate.of does not support year <= 0 + int proxyYear = Math.max(1, value.getYear()); + int week = LocalDate.of(proxyYear, value.getMonth(), value.getDay()) + .get(WeekFields.ISO.weekOfMonth()); + return formatIntegerComponent(week, primaryMod, secondMod, minWidth, maxWidth, language, "1"); + } + + /** + * Format the AM/PM marker. The default presentation is "n" (lowercase name). + * + * @param hour + * the hour value (0-23) + * @param primaryMod + * the primary modifier, or {@code null} + * @param secondMod + * the second modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @return the formatted AM/PM string + */ + @NonNull + private static String formatAmPm( + int hour, + @Nullable String primaryMod, + @Nullable Character secondMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth) { + String effective = primaryMod != null ? primaryMod : "n"; + String base = hour < 12 ? "am" : "pm"; + + String result; + if ("N".equals(effective)) { + result = base.toUpperCase(Locale.ROOT); + } else if ("Nn".equals(effective)) { + result = Character.toUpperCase(base.charAt(0)) + base.substring(1); + } else { + // default: lowercase + result = base; + } + + return applyWidthModifiers(result, minWidth, maxWidth, true); + } + + /** + * Format fractional seconds per spec 9.8.4.5. + *

+ * The fractional seconds use a "reverse digit" algorithm: the nano value is + * converted to a 9-digit string, and the format token determines how many + * digits to output. A single-digit pattern with no constraints outputs all + * significant (non-trailing-zero) digits. + * + * @param nano + * the nanosecond value (0-999999999) + * @param primaryModifier + * the primary modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @return the formatted fractional seconds string + */ + @NonNull + private static String formatFractionalSeconds( + int nano, + @Nullable String primaryModifier, + @Nullable Integer minWidth, + @Nullable Integer maxWidth) { + // Convert nano to 9-digit string + String nanoStr = String.format("%09d", nano); + String effective = primaryModifier != null ? primaryModifier : "1"; + int mandatoryDigits = countMandatoryDigits(effective); + + String result; + if (mandatoryDigits <= 1 && effective.length() <= 1) { + // Single digit pattern = no constraint, use all significant digits + result = nanoStr.replaceAll("0+$", ""); + if (result.isEmpty()) { + result = "0"; + } + + // Apply width constraints + if (maxWidth != null && result.length() > maxWidth) { + result = result.substring(0, maxWidth); + } + if (minWidth != null && result.length() < minWidth) { + result = result + "0".repeat(minWidth - result.length()); + } + } else { + // Multiple mandatory digits = exact digit count + int numDigits = mandatoryDigits; + if (minWidth != null && minWidth > numDigits) { + numDigits = minWidth; + } + if (maxWidth != null && maxWidth < numDigits) { + numDigits = maxWidth; + } + result = nanoStr.substring(0, Math.min(numDigits, 9)); + } + + return result; + } + + /** + * Format a timezone offset using the Z specifier per spec 9.8.4.6. + * + * @param value + * the temporal value + * @param primaryMod + * the primary modifier, or {@code null} + * @param secondMod + * the second modifier, or {@code null} + * @return the formatted timezone string + */ + @NonNull + private static String formatTimezone( + @NonNull ITemporalItem value, + @Nullable String primaryMod, + @Nullable Character secondMod) { + ZoneOffset offset = value.getZoneOffset(); + + // Military timezone format + if (primaryMod != null && "Z".equals(primaryMod)) { + return formatMilitaryTimezone(offset); + } + + if (offset == null) { + return ""; + } + + // Check for 't' modifier: UTC -> "Z" + boolean useZ = secondMod != null && secondMod == 't'; + if (useZ && offset.getTotalSeconds() == 0) { + return "Z"; + } + + String effective = primaryMod != null ? primaryMod : "01:01"; + return formatTimezoneNumeric(offset, effective); + } + + /** + * Format a military timezone letter. + * + * @param offset + * the zone offset, or {@code null} for local time + * @return the military timezone letter + */ + @NonNull + private static String formatMilitaryTimezone(@Nullable ZoneOffset offset) { + if (offset == null) { + return "J"; // local time + } + + int totalSeconds = offset.getTotalSeconds(); + int totalMinutes = totalSeconds / 60; + int hours = totalMinutes / 60; + int minutes = totalMinutes % 60; + + if (totalSeconds == 0) { + return "Z"; // UTC + } + + // Military letters only for whole-hour offsets -12..+12, excluding 0 + if (minutes == 0 && hours >= -12 && hours <= 12) { + if (hours > 0) { + // A=+1, B=+2, ..., I=+9, K=+10, L=+11, M=+12 (skip J at +10 position) + if (hours <= 9) { + return String.valueOf((char) ('A' + hours - 1)); + } + // hours 10,11,12: skip J so K=10, L=11, M=12 + return String.valueOf((char) ('A' + hours)); // +10->K, +11->L, +12->M + } + // Negative: N=-1, O=-2, ..., Y=-12 + return String.valueOf((char) ('N' + (-hours) - 1)); + } + + // Non-whole-hour offsets: fallback to numeric + return formatTimezoneNumeric(offset, "01:01"); + } + + /** + * Format a numeric timezone offset according to the specified pattern. + *

+ * The pattern determines the format: + *

    + *
  • {@code 0} or {@code 1} - hours only (no leading zero), minutes if + * non-zero
  • + *
  • {@code 00} or {@code 01} - hours with leading zero, minutes if + * non-zero
  • + *
  • {@code 0:00} or {@code 1:01} - hours without leading zero, always show + * minutes with separator
  • + *
  • {@code 00:00} or {@code 01:01} - hours with leading zero, always show + * minutes with separator
  • + *
  • {@code 0000} or {@code 0001} - concatenated hours+minutes, leading zero + * on hours
  • + *
  • {@code 000} or {@code 001} - concatenated hours+minutes, no leading zero + * on hours
  • + *
+ * + * @param offset + * the zone offset + * @param pattern + * the format pattern + * @return the formatted timezone string + */ + @NonNull + private static String formatTimezoneNumeric( + @NonNull ZoneOffset offset, + @NonNull String pattern) { + int totalSeconds = offset.getTotalSeconds(); + String sign = totalSeconds >= 0 ? "+" : "-"; + int absSeconds = Math.abs(totalSeconds); + int hours = absSeconds / 3600; + int minutes = (absSeconds % 3600) / 60; + + // Determine format from pattern + boolean hasSeparator = pattern.contains(":") || pattern.contains("."); + char separator = pattern.contains(":") ? ':' : '.'; + String digitsPart = pattern.replace(":", "").replace(".", ""); + int digitCount = digitsPart.length(); + + boolean padHours; + boolean alwaysShowMinutes; + + if (hasSeparator) { + // Pattern with separator (e.g., "01:01", "0:00") + int sepIndex = pattern.indexOf(separator); + padHours = sepIndex >= 2; + alwaysShowMinutes = true; + } else if (digitCount >= 3) { + // Concatenated format (e.g., "0000", "000") + padHours = digitCount >= 4; + alwaysShowMinutes = true; + // No separator in output + } else { + // Hours only (e.g., "0", "00", "01") + padHours = digitCount >= 2; + alwaysShowMinutes = false; + } + + String hoursStr = padHours + ? String.format("%02d", hours) + : String.valueOf(hours); + + if (alwaysShowMinutes) { + String minutesStr = String.format("%02d", minutes); + if (hasSeparator) { + return sign + hoursStr + separator + minutesStr; + } + return sign + hoursStr + minutesStr; + } + + // Hours only, minutes if non-zero + if (minutes != 0) { + String minutesStr = String.format("%02d", minutes); + return sign + hoursStr + ":" + minutesStr; + } + + return sign + hoursStr; + } + + /** + * Format a timezone with GMT prefix (z specifier). + * + * @param value + * the temporal value + * @param primaryMod + * the primary modifier, or {@code null} + * @param secondMod + * the second modifier, or {@code null} + * @return the formatted GMT timezone string + */ + @NonNull + private static String formatGmtTimezone( + @NonNull ITemporalItem value, + @Nullable String primaryMod, + @Nullable Character secondMod) { + ZoneOffset offset = value.getZoneOffset(); + if (offset == null) { + return ""; + } + + String tzPart = formatTimezoneNumeric(offset, primaryMod != null ? primaryMod : "01:01"); + return "GMT" + tzPart; + } + + /** + * Format the calendar name. Always returns "ad" for the Gregorian calendar. + * + * @param primaryMod + * the primary modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @return the formatted calendar string + */ + @NonNull + private static String formatCalendar( + @Nullable String primaryMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth) { + String effective = primaryMod != null ? primaryMod : "n"; + String result = applyNameCase("ad", effective); + return applyWidthModifiers(result, minWidth, maxWidth, true); + } + + /** + * Format the era indicator. Returns "ad" for non-negative years and "bc" for + * negative years. + * + * @param year + * the year value + * @param primaryMod + * the primary modifier, or {@code null} + * @param minWidth + * the minimum width, or {@code null} + * @param maxWidth + * the maximum width, or {@code null} + * @return the formatted era string + */ + @NonNull + private static String formatEra( + int year, + @Nullable String primaryMod, + @Nullable Integer minWidth, + @Nullable Integer maxWidth) { + String effective = primaryMod != null ? primaryMod : "n"; + String base = year >= 0 ? "ad" : "bc"; + String result = applyNameCase(base, effective); + return applyWidthModifiers(result, minWidth, maxWidth, true); + } + + // ==================================================================== + // Helper methods + // ==================================================================== + + /** + * Check if a format modifier represents a name format (N, Nn, or n). + * + * @param modifier + * the modifier string + * @return {@code true} if the modifier requests name formatting + */ + private static boolean isNameFormat(@NonNull String modifier) { + return "N".equals(modifier) || "Nn".equals(modifier) || "n".equals(modifier); + } + + /** + * Apply case transformation to a name string based on the modifier. + * + * @param name + * the name string in its base form + * @param modifier + * the modifier controlling case: "N" for uppercase, "n" for lowercase, + * "Nn" for title case + * @return the name with case applied + */ + @NonNull + private static String applyNameCase(@NonNull String name, @NonNull String modifier) { + switch (modifier) { + case "N": + return name.toUpperCase(Locale.ROOT); + case "n": + return name.toLowerCase(Locale.ROOT); + case "Nn": + if (name.isEmpty()) { + return name; + } + return Character.toUpperCase(name.charAt(0)) + + name.substring(1).toLowerCase(Locale.ROOT); + default: + return name; + } + } + + /** + * Apply width modifiers to a formatted string, performing padding and + * truncation as needed. + * + * @param value + * the formatted string + * @param minWidth + * the minimum width, or {@code null} for no minimum + * @param maxWidth + * the maximum width, or {@code null} for no maximum + * @param isName + * {@code true} if the value is a name (pad with spaces on the right), + * {@code false} if numeric (pad with zeros on the left) + * @return the string adjusted to fit width constraints + */ + @NonNull + private static String applyWidthModifiers( + @NonNull String value, + @Nullable Integer minWidth, + @Nullable Integer maxWidth, + boolean isName) { + String result = value; + + // Truncation + if (maxWidth != null && result.length() > maxWidth) { + result = result.substring(0, maxWidth); + } + + // Padding + if (minWidth != null && result.length() < minWidth) { + int padAmount = minWidth - result.length(); + if (isName) { + // Pad names with trailing spaces + result = result + " ".repeat(padAmount); + } else { + // Pad numbers with leading zeros + result = "0".repeat(padAmount) + result; + } + } + + return result; + } + + /** + * Count the number of mandatory (digit) characters in a format token. + * + * @param pattern + * the format token + * @return the count of digit characters + */ + private static int countMandatoryDigits(@NonNull String pattern) { + int count = 0; + for (int i = 0; i < pattern.length(); i++) { + if (Character.isDigit(pattern.charAt(i))) { + count++; + } + } + return count; + } + + /** + * Check if a format token is a decimal digit pattern (contains only decimal + * digits and optional grouping separators). + * + * @param token + * the format token to check + * @return {@code true} if the token is a decimal digit pattern + */ + private static boolean isDecimalDigitPattern(@NonNull String token) { + if (token.isEmpty()) { + return false; + } + for (int i = 0; i < token.length(); i++) { + char ch = token.charAt(i); + if (!Character.isDigit(ch) && ch != '#' && ch != ',' && ch != '.' && ch != ';') { + return false; + } + } + return true; + } +} diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java b/core/src/main/java/dev/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java index a65024edb..14beea29f 100644 --- a/core/src/main/java/dev/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java +++ b/core/src/main/java/dev/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java @@ -97,11 +97,19 @@ public DefaultFunctionLibrary() { // NOPMD - intentional registerFunction(FnFalse.SIGNATURE); // https://www.w3.org/TR/xpath-functions-31/#func-floor registerFunction(NumericFunction.signature(MetapathConstants.NS_METAPATH_FUNCTIONS, "floor", INumericItem::floor)); - // P2: https://www.w3.org/TR/xpath-functions-31/#func-format-date - // P2: https://www.w3.org/TR/xpath-functions-31/#func-format-dateTime - // P2: https://www.w3.org/TR/xpath-functions-31/#func-format-integer + // https://www.w3.org/TR/xpath-functions-31/#func-format-date + registerFunction(FnFormatDate.SIGNATURE_TWO_ARG); + registerFunction(FnFormatDate.SIGNATURE_FIVE_ARG); + // https://www.w3.org/TR/xpath-functions-31/#func-format-dateTime + registerFunction(FnFormatDateTime.SIGNATURE_TWO_ARG); + registerFunction(FnFormatDateTime.SIGNATURE_FIVE_ARG); + // https://www.w3.org/TR/xpath-functions-31/#func-format-integer + registerFunction(FnFormatInteger.SIGNATURE_TWO_ARG); + registerFunction(FnFormatInteger.SIGNATURE_THREE_ARG); // P2: https://www.w3.org/TR/xpath-functions-31/#func-format-number - // P2: https://www.w3.org/TR/xpath-functions-31/#func-format-time + // https://www.w3.org/TR/xpath-functions-31/#func-format-time + registerFunction(FnFormatTime.SIGNATURE_TWO_ARG); + registerFunction(FnFormatTime.SIGNATURE_FIVE_ARG); // https://www.w3.org/TR/xpath-functions-31/#func-function-arity registerFunction(FnFunctionArity.SIGNATURE); // https://www.w3.org/TR/xpath-functions-31/#func-function-lookup diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatDate.java b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatDate.java new file mode 100644 index 000000000..922f0a92f --- /dev/null +++ b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatDate.java @@ -0,0 +1,184 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import java.util.List; +import java.util.Set; + +import dev.metaschema.core.metapath.DynamicContext; +import dev.metaschema.core.metapath.MetapathConstants; +import dev.metaschema.core.metapath.function.FunctionUtils; +import dev.metaschema.core.metapath.function.IArgument; +import dev.metaschema.core.metapath.function.IFunction; +import dev.metaschema.core.metapath.item.IItem; +import dev.metaschema.core.metapath.item.ISequence; +import dev.metaschema.core.metapath.item.atomic.IDateItem; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import dev.metaschema.core.util.ObjectUtils; +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Implements the XPath 3.1 fn:format-date + * functions. + * + * @see XPath 3.1 + * fn:format-date + */ +public final class FnFormatDate { + private static final String NAME = "format-date"; + + /** + * The set of component specifiers allowed for date values, which excludes + * time-only markers (H, h, P, m, s, f). + */ + @NonNull + static final Set DATE_MARKERS = Set.of( + 'Y', 'M', 'D', 'd', 'F', 'W', 'w', + 'Z', 'z', 'C', 'E'); + + @NonNull + static final IFunction SIGNATURE_TWO_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextDependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(IDateItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .returnType(IStringItem.type()) + .returnZeroOrOne() + .functionHandler(FnFormatDate::executeTwoArg) + .build(); + + @NonNull + static final IFunction SIGNATURE_FIVE_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextDependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(IDateItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .argument(IArgument.builder() + .name("language") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("calendar") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("place") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .returnType(IStringItem.type()) + .returnZeroOrOne() + .functionHandler(FnFormatDate::executeFiveArg) + .build(); + + private FnFormatDate() { + // disable construction + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeTwoArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + IDateItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + if (value == null) { + return ISequence.empty(); + } + + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + + String lang = dynamicContext.getStaticContext().getDefaultLanguage(); + + return ISequence.of(IStringItem.valueOf( + formatDate(value, picture.asString(), lang, null, null))); + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeFiveArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + IDateItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + if (value == null) { + return ISequence.empty(); + } + + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + IStringItem language = FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)); + IStringItem calendar = FunctionUtils.asTypeOrNull(arguments.get(3).getFirstItem(true)); + IStringItem place = FunctionUtils.asTypeOrNull(arguments.get(4).getFirstItem(true)); + + return ISequence.of(IStringItem.valueOf( + formatDate( + value, + picture.asString(), + language == null ? dynamicContext.getStaticContext().getDefaultLanguage() : language.asString(), + calendar == null ? null : calendar.asString(), + place == null ? null : place.asString()))); + } + + /** + * Format a date value using a picture string per the XPath 3.1 fn:format-date + * specification. + * + * @param value + * the date value to format + * @param picture + * the picture string + * @param language + * the language, or {@code null} + * @param calendar + * the calendar, or {@code null} + * @param place + * the place, or {@code null} + * @return the formatted string + */ + @NonNull + public static String formatDate( + @NonNull IDateItem value, + @NonNull String picture, + @Nullable String language, + @Nullable String calendar, + @Nullable String place) { + return DateTimeFormatUtil.formatDateTime(value, picture, language, calendar, place, DATE_MARKERS); + } +} diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatDateTime.java b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatDateTime.java new file mode 100644 index 000000000..97d6217dc --- /dev/null +++ b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatDateTime.java @@ -0,0 +1,185 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import java.util.List; +import java.util.Set; + +import dev.metaschema.core.metapath.DynamicContext; +import dev.metaschema.core.metapath.MetapathConstants; +import dev.metaschema.core.metapath.function.FunctionUtils; +import dev.metaschema.core.metapath.function.IArgument; +import dev.metaschema.core.metapath.function.IFunction; +import dev.metaschema.core.metapath.item.IItem; +import dev.metaschema.core.metapath.item.ISequence; +import dev.metaschema.core.metapath.item.atomic.IDateTimeItem; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import dev.metaschema.core.util.ObjectUtils; +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Implements the XPath 3.1 fn:format-dateTime + * functions. + * + * @see XPath + * 3.1 fn:format-dateTime + */ +public final class FnFormatDateTime { + private static final String NAME = "format-dateTime"; + + /** + * The set of component specifiers allowed for dateTime values, which includes + * all date and time markers. + */ + @NonNull + static final Set ALL_MARKERS = Set.of( + 'Y', 'M', 'D', 'd', 'F', 'W', 'w', + 'H', 'h', 'P', 'm', 's', 'f', + 'Z', 'z', 'C', 'E'); + + @NonNull + static final IFunction SIGNATURE_TWO_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextDependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(IDateTimeItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .returnType(IStringItem.type()) + .returnZeroOrOne() + .functionHandler(FnFormatDateTime::executeTwoArg) + .build(); + + @NonNull + static final IFunction SIGNATURE_FIVE_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextDependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(IDateTimeItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .argument(IArgument.builder() + .name("language") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("calendar") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("place") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .returnType(IStringItem.type()) + .returnZeroOrOne() + .functionHandler(FnFormatDateTime::executeFiveArg) + .build(); + + private FnFormatDateTime() { + // disable construction + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeTwoArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + IDateTimeItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + if (value == null) { + return ISequence.empty(); + } + + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + + String lang = dynamicContext.getStaticContext().getDefaultLanguage(); + + return ISequence.of(IStringItem.valueOf( + formatDateTime(value, picture.asString(), lang, null, null))); + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeFiveArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + IDateTimeItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + if (value == null) { + return ISequence.empty(); + } + + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + IStringItem language = FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)); + IStringItem calendar = FunctionUtils.asTypeOrNull(arguments.get(3).getFirstItem(true)); + IStringItem place = FunctionUtils.asTypeOrNull(arguments.get(4).getFirstItem(true)); + + return ISequence.of(IStringItem.valueOf( + formatDateTime( + value, + picture.asString(), + language == null ? dynamicContext.getStaticContext().getDefaultLanguage() : language.asString(), + calendar == null ? null : calendar.asString(), + place == null ? null : place.asString()))); + } + + /** + * Format a dateTime value using a picture string per the XPath 3.1 fn:format-dateTime + * specification. + * + * @param value + * the dateTime value to format + * @param picture + * the picture string + * @param language + * the language, or {@code null} + * @param calendar + * the calendar, or {@code null} + * @param place + * the place, or {@code null} + * @return the formatted string + */ + @NonNull + public static String formatDateTime( + @NonNull IDateTimeItem value, + @NonNull String picture, + @Nullable String language, + @Nullable String calendar, + @Nullable String place) { + return DateTimeFormatUtil.formatDateTime(value, picture, language, calendar, place, ALL_MARKERS); + } +} diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatInteger.java b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatInteger.java new file mode 100644 index 000000000..dc955e658 --- /dev/null +++ b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatInteger.java @@ -0,0 +1,807 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import dev.metaschema.core.metapath.DynamicContext; +import dev.metaschema.core.metapath.MetapathConstants; +import dev.metaschema.core.metapath.function.FormatFunctionException; +import dev.metaschema.core.metapath.function.FunctionUtils; +import dev.metaschema.core.metapath.function.IArgument; +import dev.metaschema.core.metapath.function.IFunction; +import dev.metaschema.core.metapath.item.IItem; +import dev.metaschema.core.metapath.item.ISequence; +import dev.metaschema.core.metapath.item.atomic.IIntegerItem; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import dev.metaschema.core.util.ObjectUtils; +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Implements the XPath 3.1 fn:format-integer + * functions. + * + * @see XPath + * 3.1 fn:format-integer + */ +public final class FnFormatInteger { + private static final String NAME = "format-integer"; + + @NonNull + static final IFunction SIGNATURE_TWO_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextDependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(IIntegerItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .returnType(IStringItem.type()) + .returnOne() + .functionHandler(FnFormatInteger::executeTwoArg) + .build(); + + @NonNull + static final IFunction SIGNATURE_THREE_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextIndependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(IIntegerItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .argument(IArgument.builder() + .name("lang") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .returnType(IStringItem.type()) + .returnOne() + .functionHandler(FnFormatInteger::executeThreeArg) + .build(); + + /** + * Roman numeral values in descending order, used for converting integers to + * Roman numeral representation. + */ + private static final int[] ROMAN_VALUES = { 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 }; + + /** + * Roman numeral symbols corresponding to {@link #ROMAN_VALUES}. + */ + private static final String[] ROMAN_SYMBOLS = { "M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", + "I" }; + + private static final String[] ONES + = { "", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", + "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", + "seventeen", "eighteen", "nineteen" }; + + private static final String[] TENS + = { "", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" }; + + /** + * Pattern to match the format modifier portion of a picture string. The + * modifier appears after the last {@code ;} in the picture and must match + * {@code ^([co](\(.+\))?)?[at]?$}. + */ + private static final Pattern MODIFIER_PATTERN + = Pattern.compile("^([co](\\(.+\\))?)?[at]?$"); + + private FnFormatInteger() { + // disable construction + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeTwoArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + IIntegerItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + + String lang = dynamicContext.getStaticContext().getDefaultLanguage(); + + return ISequence.of(IStringItem.valueOf( + fnFormatInteger(value, picture.asString(), lang))); + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeThreeArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + IIntegerItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + IStringItem lang = FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)); + + return ISequence.of(IStringItem.valueOf( + fnFormatInteger(value, picture.asString(), lang == null ? null : lang.asString()))); + } + + /** + * An implementation of XPath 3.1 fn:format-integer. + * + * @param value + * the integer value to format, or {@code null} for empty sequence + * @param picture + * the picture string controlling the format + * @param lang + * the language for locale-dependent formatting, or {@code null} to use + * the default + * @return the formatted integer string + * @throws FormatFunctionException + * if the picture string contains an invalid format token + */ + @NonNull + public static String fnFormatInteger( + @Nullable IIntegerItem value, + @NonNull String picture, + @Nullable String lang) { + + // If $value is an empty sequence, return zero-length string + if (value == null) { + return ""; + } + + if (picture.isEmpty()) { + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + "The picture string for format-integer must not be empty."); + } + + // Parse primary format token and modifier + // The modifier is separated by the last ';' that is part of the modifier + // syntax. We need to find the split point. + String[] parsed = parsePicture(picture); + String primaryToken = parsed[0]; + String modifier = parsed[1]; + + // Parse modifier flags + boolean ordinal = false; + if (!modifier.isEmpty()) { + Matcher modMatcher = MODIFIER_PATTERN.matcher(modifier); + if (!modMatcher.matches()) { + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + String.format("Invalid format modifier '%s' in picture string '%s'.", modifier, picture)); + } + String modLetters = modMatcher.group(1); + if (modLetters != null && modLetters.startsWith("o")) { + ordinal = true; + } + } + + BigInteger bigValue = value.asInteger(); + boolean negative = bigValue.signum() < 0; + BigInteger absValue = bigValue.abs(); + + String formatted = formatWithPrimaryToken(primaryToken, absValue, picture); + + // Apply ordinal suffix if requested and supported for this format token. + // Per spec: "If ordinal numbering is not supported for the combination of + // the format token, the language, and the string appearing in parentheses, + // the request is ignored and cardinal numbers are generated instead." + // Only decimal digit patterns support ordinal suffix in this implementation. + if (ordinal && isDecimalDigitPattern(primaryToken)) { + formatted = applyOrdinal(formatted, absValue); + } + + // Prepend minus sign for negative values + if (negative) { + formatted = "-" + formatted; + } + + return ObjectUtils.notNull(formatted); + } + + /** + * Parses the picture string into the primary format token and the format + * modifier. The modifier is separated from the primary token by the last + * semicolon. However, semicolons can appear as grouping separators within the + * primary token. The modifier must match {@code ^([co](\(.+\))?)?[at]?$}. + * + * @param picture + * the picture string to parse + * @return a two-element array where index 0 is the primary format token and + * index 1 is the format modifier + */ + @NonNull + private static String[] parsePicture(@NonNull String picture) { + // Try splitting at each ';' from the right. The part after the ';' must match + // the modifier pattern (or be empty). The first valid split from the right is + // the correct one. + for (int i = picture.length() - 1; i >= 0; i--) { + if (picture.charAt(i) == ';') { + String candidateModifier = picture.substring(i + 1); + String candidateToken = picture.substring(0, i); + if (MODIFIER_PATTERN.matcher(candidateModifier).matches()) { + return new String[] { candidateToken, candidateModifier }; + } + } + } + // No valid modifier split found; the entire picture is the primary token + return new String[] { picture, "" }; + } + + /** + * Formats the absolute integer value using the given primary format token. + * + * @param primaryToken + * the primary format token + * @param absValue + * the absolute value of the integer to format + * @param picture + * the original picture string, used in error messages + * @return the formatted string + * @throws FormatFunctionException + * if the format token is invalid + */ + @NonNull + private static String formatWithPrimaryToken( + @NonNull String primaryToken, + @NonNull BigInteger absValue, + @NonNull String picture) { + + if (primaryToken.isEmpty()) { + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + String.format("The primary format token in picture string '%s' must not be empty.", picture)); + } + + // Check for known named tokens + if ("a".equals(primaryToken)) { + return formatAlphabetic(absValue, false); + } + if ("A".equals(primaryToken)) { + return formatAlphabetic(absValue, true); + } + if ("i".equals(primaryToken)) { + return formatRoman(absValue, false); + } + if ("I".equals(primaryToken)) { + return formatRoman(absValue, true); + } + if ("w".equals(primaryToken)) { + return formatWords(absValue, false, false); + } + if ("W".equals(primaryToken)) { + return formatWords(absValue, true, false); + } + if ("Ww".equals(primaryToken)) { + return formatWords(absValue, false, true); + } + + // Must be a decimal digit pattern + return formatDecimalDigitPattern(primaryToken, absValue, picture); + } + + /** + * Formats an integer as a decimal digit pattern with optional grouping + * separators and zero-padding. + * + * @param pattern + * the decimal digit pattern portion of the picture string + * @param absValue + * the absolute value of the integer to format + * @param picture + * the original picture string, used in error messages + * @return the formatted decimal string + * @throws FormatFunctionException + * if the pattern is invalid + */ + @NonNull + @SuppressWarnings("PMD.CyclomaticComplexity") + private static String formatDecimalDigitPattern( + @NonNull String pattern, + @NonNull BigInteger absValue, + @NonNull String picture) { + + // Parse the pattern to identify mandatory digits, optional digits, and grouping + // separators. Mandatory digits are '0'-'9', optional digits are '#', and + // everything else that is not a letter or digit is a grouping separator. + List patternChars = new ArrayList<>(); + List isSeparator = new ArrayList<>(); + + int mandatoryCount = 0; + boolean foundMandatory = false; + boolean hasOptional = false; + + for (int i = 0; i < pattern.length(); i++) { + char ch = pattern.charAt(i); + patternChars.add(ch); + + if (ch >= '0' && ch <= '9') { + isSeparator.add(false); + mandatoryCount++; + foundMandatory = true; + } else if (ch == '#') { + if (foundMandatory) { + // optional digits must precede mandatory digits + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + String.format( + "In picture string '%s', optional-digit-sign '#' must precede all mandatory-digit-signs.", + picture)); + } + isSeparator.add(false); + hasOptional = true; + } else if (!Character.isLetterOrDigit(ch)) { + // grouping separator + isSeparator.add(true); + } else { + // unrecognized letter/digit that isn't 0-9 or #; fallback to format '1' + return ObjectUtils.notNull(absValue.toString()); + } + } + + if (mandatoryCount == 0) { + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + String.format( + "The decimal digit pattern in picture string '%s' must contain at least one mandatory digit.", + picture)); + } + + // Validate: separators not at start or end, and not adjacent + validateSeparators(patternChars, isSeparator, picture); + + // Determine the grouping separator character and positions (from right) + // We work from the right side of the pattern. + char groupingSep = 0; + List groupPositions = new ArrayList<>(); + int digitIndex = 0; + for (int i = patternChars.size() - 1; i >= 0; i--) { + if (Boolean.TRUE.equals(isSeparator.get(i))) { + groupingSep = patternChars.get(i); + groupPositions.add(digitIndex); + } else { + digitIndex++; + } + } + + // Format the number with minimum width + String digits = absValue.toString(); + if (digits.length() < mandatoryCount) { + StringBuilder padded = new StringBuilder(); + for (int i = digits.length(); i < mandatoryCount; i++) { + padded.append('0'); + } + padded.append(digits); + digits = padded.toString(); + } + + // Insert grouping separators if any + if (!groupPositions.isEmpty() && groupingSep != 0) { + digits = insertGroupingSeparators(digits, groupingSep, groupPositions, hasOptional); + } + + return ObjectUtils.notNull(digits); + } + + /** + * Validates that grouping separators do not appear at the start or end of the + * pattern, and that no two separators are adjacent. + * + * @param patternChars + * the characters in the pattern + * @param isSeparator + * flags indicating which positions are separators + * @param picture + * the original picture string, used in error messages + * @throws FormatFunctionException + * if separator placement is invalid + */ + private static void validateSeparators( + @NonNull List patternChars, + @NonNull List isSeparator, + @NonNull String picture) { + + if (!patternChars.isEmpty()) { + if (Boolean.TRUE.equals(isSeparator.get(0))) { + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + String.format( + "Grouping separator must not appear at the start of the pattern in picture string '%s'.", + picture)); + } + if (Boolean.TRUE.equals(isSeparator.get(isSeparator.size() - 1))) { + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + String.format( + "Grouping separator must not appear at the end of the pattern in picture string '%s'.", + picture)); + } + for (int i = 1; i < isSeparator.size(); i++) { + if (Boolean.TRUE.equals(isSeparator.get(i)) && Boolean.TRUE.equals(isSeparator.get(i - 1))) { + throw new FormatFunctionException( + FormatFunctionException.INVALID_FORMAT_TOKEN, + String.format( + "Adjacent grouping separators are not allowed in picture string '%s'.", + picture)); + } + } + } + } + + /** + * Inserts grouping separators into a digit string at specified positions. + * + *

+ * If separators appear at regular intervals (all same character, evenly + * spaced), the pattern is extrapolated to the left. Otherwise, separators are + * inserted only at the explicit positions. + * + * @param digits + * the digit string to insert separators into + * @param separator + * the grouping separator character + * @param positions + * the positions (from right, 0-based digit positions) where separators + * appear in the pattern + * @param hasOptional + * whether the pattern contains optional-digit-signs + * @return the digit string with grouping separators inserted + */ + @NonNull + private static String insertGroupingSeparators( + @NonNull String digits, + char separator, + @NonNull List positions, + boolean hasOptional) { + + // Determine if the pattern is regular (all positions at same interval) + int groupSize = -1; + boolean regular = true; + + List sorted = new ArrayList<>(positions); + sorted.sort(null); + + if (sorted.size() == 1) { + groupSize = sorted.get(0); + regular = true; + } else { + // Check if all positions are at regular intervals (multiples of the smallest) + int candidate = sorted.get(0); + regular = true; + for (int i = 0; i < sorted.size(); i++) { + if (sorted.get(i) != candidate * (i + 1)) { + regular = false; + break; + } + } + if (regular) { + groupSize = candidate; + } + } + + // Build result from right to left, inserting separators at group boundaries + StringBuilder result = new StringBuilder(); + int digitCount = digits.length(); + int rightDigitCount = 0; + + for (int i = digitCount - 1; i >= 0; i--) { + if (rightDigitCount > 0) { + boolean insertSep; + if (regular && groupSize > 0) { + insertSep = rightDigitCount % groupSize == 0; + } else { + insertSep = sorted.contains(rightDigitCount); + } + if (insertSep) { + result.insert(0, separator); + } + } + result.insert(0, digits.charAt(i)); + rightDigitCount++; + } + + return ObjectUtils.notNull(result.toString()); + } + + /** + * Formats an integer as an alphabetic sequence (a, b, ..., z, aa, ab, ...). + * + *

+ * The value 1 maps to 'a', 2 to 'b', ..., 26 to 'z', 27 to 'aa', 28 to 'ab', + * and so on, similar to spreadsheet column names. Zero is formatted as '0'. + * + * @param absValue + * the absolute value of the integer + * @param uppercase + * whether to produce uppercase letters + * @return the alphabetic representation + */ + @NonNull + private static String formatAlphabetic(@NonNull BigInteger absValue, boolean uppercase) { + if (absValue.signum() == 0) { + return "0"; + } + + char base = uppercase ? 'A' : 'a'; + StringBuilder result = new StringBuilder(); + BigInteger remaining = absValue; + BigInteger twentySix = BigInteger.valueOf(26); + + while (remaining.signum() > 0) { + remaining = remaining.subtract(BigInteger.ONE); + int digit = remaining.mod(twentySix).intValue(); + result.insert(0, (char) (base + digit)); + remaining = remaining.divide(twentySix); + } + + return ObjectUtils.notNull(result.toString()); + } + + /** + * Formats an integer as a Roman numeral string using standard subtractive + * notation. Supports values from 1 to 3999. + * + * @param absValue + * the absolute value of the integer + * @param uppercase + * whether to produce uppercase Roman numerals + * @return the Roman numeral representation + * @throws FormatFunctionException + * if the value is zero or exceeds 3999 + */ + @NonNull + private static String formatRoman(@NonNull BigInteger absValue, boolean uppercase) { + if (absValue.signum() == 0 || absValue.compareTo(BigInteger.valueOf(3999)) > 0) { + // Fallback: use decimal for values outside Roman numeral range + return ObjectUtils.notNull(absValue.toString()); + } + + int num = absValue.intValue(); + StringBuilder result = new StringBuilder(); + for (int i = 0; i < ROMAN_VALUES.length; i++) { + while (num >= ROMAN_VALUES[i]) { + result.append(ROMAN_SYMBOLS[i]); + num -= ROMAN_VALUES[i]; + } + } + + String roman = result.toString(); + if (!uppercase) { + roman = roman.toLowerCase(Locale.ROOT); + } + return ObjectUtils.notNull(roman); + } + + /** + * Formats an integer as English words. + * + * @param absValue + * the absolute value of the integer + * @param allUppercase + * whether to produce all-uppercase output + * @param titleCase + * whether to produce title-case output (first letter of each word + * capitalized) + * @return the English word representation + */ + @NonNull + private static String formatWords(@NonNull BigInteger absValue, boolean allUppercase, boolean titleCase) { + String words = numberToWords(absValue); + + if (allUppercase) { + words = words.toUpperCase(Locale.ROOT); + } else if (titleCase) { + words = toTitleCase(words); + } + + return ObjectUtils.notNull(words); + } + + /** + * Converts a non-negative integer to its English word representation. Supports + * values up to 999,999,999 and beyond through recursive decomposition. + * + * @param value + * the non-negative integer value + * @return the English word representation in lowercase + */ + @NonNull + @SuppressWarnings("PMD.CyclomaticComplexity") + private static String numberToWords(@NonNull BigInteger value) { + if (value.signum() == 0) { + return "zero"; + } + + if (value.compareTo(BigInteger.valueOf(20)) < 0) { + return ObjectUtils.notNull(ONES[value.intValue()]); + } + + if (value.compareTo(BigInteger.valueOf(100)) < 0) { + int tens = value.intValue() / 10; + int ones = value.intValue() % 10; + if (ones == 0) { + return ObjectUtils.notNull(TENS[tens]); + } + return ObjectUtils.notNull(TENS[tens] + "-" + ONES[ones]); + } + + if (value.compareTo(BigInteger.valueOf(1000)) < 0) { + int hundreds = value.intValue() / 100; + int remainder = value.intValue() % 100; + if (remainder == 0) { + return ONES[hundreds] + " hundred"; + } + return ONES[hundreds] + " hundred " + numberToWords(BigInteger.valueOf(remainder)); + } + + // Handle thousands, millions, billions, etc. + return formatLargeNumber(value); + } + + /** + * Formats a number of 1000 or greater using the standard naming convention + * (thousand, million, billion, trillion, etc.). + * + * @param value + * the value to format (must be >= 1000) + * @return the English word representation + */ + @NonNull + private static String formatLargeNumber(@NonNull BigInteger value) { + String[] scaleWords = { "", "thousand", "million", "billion", "trillion", + "quadrillion", "quintillion", "sextillion", "septillion" }; + BigInteger oneThousand = BigInteger.valueOf(1000); + + // Fall back to decimal representation for values beyond supported scale + BigInteger maxSupported = BigInteger.TEN.pow((scaleWords.length) * 3); + if (value.compareTo(maxSupported) >= 0) { + return ObjectUtils.notNull(value.toString()); + } + + // Decompose into groups of three digits + List groups = new ArrayList<>(); + BigInteger remaining = value; + while (remaining.signum() > 0) { + groups.add(remaining.mod(oneThousand).intValue()); + remaining = remaining.divide(oneThousand); + } + + StringBuilder result = new StringBuilder(); + for (int i = groups.size() - 1; i >= 0; i--) { + int group = groups.get(i); + if (group == 0) { + continue; + } + if (result.length() > 0) { + result.append(' '); + } + result.append(numberToWords(BigInteger.valueOf(group))); + if (i > 0 && i < scaleWords.length) { + result.append(' ').append(scaleWords[i]); + } + } + + return ObjectUtils.notNull(result.toString()); + } + + /** + * Converts a hyphen-separated word string to title case, where the first letter + * of each word (split on spaces and hyphens) is capitalized. + * + * @param input + * the input string in lowercase + * @return the title-cased string + */ + @NonNull + private static String toTitleCase(@NonNull String input) { + StringBuilder result = new StringBuilder(); + boolean capitalizeNext = true; + + for (int i = 0; i < input.length(); i++) { + char ch = input.charAt(i); + if (ch == ' ' || ch == '-') { + result.append(ch); + capitalizeNext = true; + } else if (capitalizeNext) { + result.append(Character.toUpperCase(ch)); + capitalizeNext = false; + } else { + result.append(ch); + } + } + + return ObjectUtils.notNull(result.toString()); + } + + /** + * Checks whether the given primary format token is a decimal digit pattern + * (contains at least one ASCII digit or '#'). Named format tokens like + * {@code a}, {@code i}, {@code w}, etc. are not decimal digit patterns. + * + * @param primaryToken + * the primary format token + * @return {@code true} if the token is a decimal digit pattern + */ + private static boolean isDecimalDigitPattern(@NonNull String primaryToken) { + for (int i = 0; i < primaryToken.length(); i++) { + char ch = primaryToken.charAt(i); + if ((ch >= '0' && ch <= '9') || ch == '#') { + return true; + } + } + return false; + } + + /** + * Appends an ordinal suffix to a formatted number string. For English, the + * rules are: + *

    + *
  • If the last two digits are 11, 12, or 13: "th"
  • + *
  • If the last digit is 1: "st"
  • + *
  • If the last digit is 2: "nd"
  • + *
  • If the last digit is 3: "rd"
  • + *
  • Otherwise: "th"
  • + *
+ * + * @param formatted + * the formatted number string + * @param absValue + * the absolute value of the integer + * @return the formatted string with ordinal suffix appended + */ + @NonNull + private static String applyOrdinal( + @NonNull String formatted, + @NonNull BigInteger absValue) { + + int num = absValue.mod(BigInteger.valueOf(100)).intValue(); + int lastDigit = absValue.mod(BigInteger.valueOf(10)).intValue(); + + String suffix; + if (num >= 11 && num <= 13) { + suffix = "th"; + } else if (lastDigit == 1) { + suffix = "st"; + } else if (lastDigit == 2) { + suffix = "nd"; + } else if (lastDigit == 3) { + suffix = "rd"; + } else { + suffix = "th"; + } + + return ObjectUtils.notNull(formatted + suffix); + } +} diff --git a/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatTime.java b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatTime.java new file mode 100644 index 000000000..0228eca8a --- /dev/null +++ b/core/src/main/java/dev/metaschema/core/metapath/function/library/FnFormatTime.java @@ -0,0 +1,186 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import java.util.List; +import java.util.Set; + +import dev.metaschema.core.metapath.DynamicContext; +import dev.metaschema.core.metapath.MetapathConstants; +import dev.metaschema.core.metapath.function.FunctionUtils; +import dev.metaschema.core.metapath.function.IArgument; +import dev.metaschema.core.metapath.function.IFunction; +import dev.metaschema.core.metapath.item.IItem; +import dev.metaschema.core.metapath.item.ISequence; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import dev.metaschema.core.metapath.item.atomic.ITimeItem; +import dev.metaschema.core.util.ObjectUtils; +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Implements the XPath 3.1 fn:format-time + * functions. + * + * @see XPath 3.1 + * fn:format-time + */ +public final class FnFormatTime { + private static final String NAME = "format-time"; + + /** + * The set of component specifiers allowed for time values, which excludes + * date-only markers (Y, M, D, d, F, W, w) and era (E), since era is defined as + * "a baseline for the numbering of years" and is not available in + * {@code xs:time} values. + */ + @NonNull + static final Set TIME_MARKERS = Set.of( + 'H', 'h', 'P', 'm', 's', 'f', + 'Z', 'z', 'C'); + + @NonNull + static final IFunction SIGNATURE_TWO_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextDependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(ITimeItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .returnType(IStringItem.type()) + .returnZeroOrOne() + .functionHandler(FnFormatTime::executeTwoArg) + .build(); + + @NonNull + static final IFunction SIGNATURE_FIVE_ARG = IFunction.builder() + .name(NAME) + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextDependent() + .focusIndependent() + .argument(IArgument.builder() + .name("value") + .type(ITimeItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("picture") + .type(IStringItem.type()) + .one() + .build()) + .argument(IArgument.builder() + .name("language") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("calendar") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("place") + .type(IStringItem.type()) + .zeroOrOne() + .build()) + .returnType(IStringItem.type()) + .returnZeroOrOne() + .functionHandler(FnFormatTime::executeFiveArg) + .build(); + + private FnFormatTime() { + // disable construction + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeTwoArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + ITimeItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + if (value == null) { + return ISequence.empty(); + } + + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + + String lang = dynamicContext.getStaticContext().getDefaultLanguage(); + + return ISequence.of(IStringItem.valueOf( + formatTime(value, picture.asString(), lang, null, null))); + } + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeFiveArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + ITimeItem value = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + if (value == null) { + return ISequence.empty(); + } + + IStringItem picture = FunctionUtils.asType( + ObjectUtils.requireNonNull(arguments.get(1).getFirstItem(true))); + IStringItem language = FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)); + IStringItem calendar = FunctionUtils.asTypeOrNull(arguments.get(3).getFirstItem(true)); + IStringItem place = FunctionUtils.asTypeOrNull(arguments.get(4).getFirstItem(true)); + + return ISequence.of(IStringItem.valueOf( + formatTime( + value, + picture.asString(), + language == null ? dynamicContext.getStaticContext().getDefaultLanguage() : language.asString(), + calendar == null ? null : calendar.asString(), + place == null ? null : place.asString()))); + } + + /** + * Format a time value using a picture string per the XPath 3.1 fn:format-time + * specification. + * + * @param value + * the time value to format + * @param picture + * the picture string + * @param language + * the language, or {@code null} + * @param calendar + * the calendar, or {@code null} + * @param place + * the place, or {@code null} + * @return the formatted string + */ + @NonNull + public static String formatTime( + @NonNull ITimeItem value, + @NonNull String picture, + @Nullable String language, + @Nullable String calendar, + @Nullable String place) { + return DateTimeFormatUtil.formatDateTime(value, picture, language, calendar, place, TIME_MARKERS); + } +} diff --git a/core/src/test/java/dev/metaschema/core/metapath/function/FormatDateTimeFunctionExceptionTest.java b/core/src/test/java/dev/metaschema/core/metapath/function/FormatDateTimeFunctionExceptionTest.java new file mode 100644 index 000000000..b65413aef --- /dev/null +++ b/core/src/test/java/dev/metaschema/core/metapath/function/FormatDateTimeFunctionExceptionTest.java @@ -0,0 +1,92 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; + +import org.junit.jupiter.api.Test; + +import dev.metaschema.core.metapath.IErrorCode; + +/** + * Unit tests for {@link FormatDateTimeFunctionException}. + */ +class FormatDateTimeFunctionExceptionTest { + + @Test + void testConstructWithInvalidPictureStringCodeAndMessage() { + String message = "Invalid picture string component"; + FormatDateTimeFunctionException ex + = new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + message); + + IErrorCode errorCode = ex.getErrorCode(); + assertEquals("FOFD", errorCode.getPrefix()); + assertEquals(1340, errorCode.getCode()); + assertEquals("FOFD1340", errorCode.getCodeAsString()); + assertEquals("FOFD1340: " + message, ex.getMessage()); + } + + @Test + void testConstructWithComponentNotAvailableCodeAndMessage() { + String message = "Component not available for formatting"; + FormatDateTimeFunctionException ex + = new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.COMPONENT_NOT_AVAILABLE, + message); + + IErrorCode errorCode = ex.getErrorCode(); + assertEquals("FOFD", errorCode.getPrefix()); + assertEquals(1350, errorCode.getCode()); + assertEquals("FOFD1350", errorCode.getCodeAsString()); + assertEquals("FOFD1350: " + message, ex.getMessage()); + } + + @Test + void testErrorCodePrefixIsFofd() { + FormatDateTimeFunctionException ex + = new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + "test"); + + assertEquals("FOFD", ex.getErrorCode().getPrefix()); + } + + @Test + void testConstructWithCodeMessageAndCause() { + String message = "Something went wrong"; + Throwable cause = new IllegalArgumentException("root cause"); + FormatDateTimeFunctionException ex + = new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.INVALID_PICTURE_STRING, + message, + cause); + + IErrorCode errorCode = ex.getErrorCode(); + assertEquals("FOFD", errorCode.getPrefix()); + assertEquals(1340, errorCode.getCode()); + assertEquals("FOFD1340: " + message, ex.getMessage()); + assertSame(cause, ex.getCause()); + } + + @Test + void testConstructWithCodeAndCauseOnly() { + Throwable cause = new IllegalStateException("underlying error"); + FormatDateTimeFunctionException ex + = new FormatDateTimeFunctionException( + FormatDateTimeFunctionException.COMPONENT_NOT_AVAILABLE, + cause); + + IErrorCode errorCode = ex.getErrorCode(); + assertEquals("FOFD", errorCode.getPrefix()); + assertEquals(1350, errorCode.getCode()); + assertSame(cause, ex.getCause()); + // getMessage() will include the error code prefix but no custom message text + assertEquals("FOFD1350", ex.getMessage()); + } +} diff --git a/core/src/test/java/dev/metaschema/core/metapath/function/FormatFunctionExceptionTest.java b/core/src/test/java/dev/metaschema/core/metapath/function/FormatFunctionExceptionTest.java new file mode 100644 index 000000000..bed662914 --- /dev/null +++ b/core/src/test/java/dev/metaschema/core/metapath/function/FormatFunctionExceptionTest.java @@ -0,0 +1,69 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; + +import org.junit.jupiter.api.Test; + +import dev.metaschema.core.metapath.IErrorCode; + +class FormatFunctionExceptionTest { + + @Test + void testConstructionWithCodeAndMessage() { + String message = "invalid format token '~'"; + FormatFunctionException ex + = new FormatFunctionException(FormatFunctionException.INVALID_FORMAT_TOKEN, message); + + IErrorCode errorCode = ex.getErrorCode(); + assertNotNull(errorCode); + assertEquals("FODF", errorCode.getPrefix()); + assertEquals(1310, errorCode.getCode()); + assertEquals("FODF1310: " + message, ex.getMessage()); + assertNull(ex.getCause()); + } + + @Test + void testErrorCodePrefix() { + FormatFunctionException ex + = new FormatFunctionException(FormatFunctionException.INVALID_FORMAT_TOKEN, "test"); + + IErrorCode errorCode = ex.getErrorCode(); + assertEquals("FODF", errorCode.getPrefix()); + assertEquals(1310, errorCode.getCode()); + assertEquals("FODF1310", errorCode.getCodeAsString()); + } + + @Test + void testConstructionWithCodeMessageAndCause() { + String message = "invalid format token"; + Throwable cause = new IllegalArgumentException("bad token"); + FormatFunctionException ex + = new FormatFunctionException(FormatFunctionException.INVALID_FORMAT_TOKEN, message, cause); + + IErrorCode errorCode = ex.getErrorCode(); + assertEquals("FODF", errorCode.getPrefix()); + assertEquals(1310, errorCode.getCode()); + assertEquals("FODF1310: " + message, ex.getMessage()); + assertSame(cause, ex.getCause()); + } + + @Test + void testConstructionWithCodeAndCause() { + Throwable cause = new IllegalArgumentException("bad token"); + FormatFunctionException ex + = new FormatFunctionException(FormatFunctionException.INVALID_FORMAT_TOKEN, cause); + + IErrorCode errorCode = ex.getErrorCode(); + assertEquals("FODF", errorCode.getPrefix()); + assertEquals(1310, errorCode.getCode()); + assertSame(cause, ex.getCause()); + } +} diff --git a/core/src/test/java/dev/metaschema/core/metapath/function/library/DateTimeFormatUtilTest.java b/core/src/test/java/dev/metaschema/core/metapath/function/library/DateTimeFormatUtilTest.java new file mode 100644 index 000000000..dd8be94bb --- /dev/null +++ b/core/src/test/java/dev/metaschema/core/metapath/function/library/DateTimeFormatUtilTest.java @@ -0,0 +1,932 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; + +import dev.metaschema.core.metapath.function.FormatDateTimeFunctionException; +import dev.metaschema.core.metapath.item.atomic.IDateItem; +import dev.metaschema.core.metapath.item.atomic.IDateTimeItem; +import dev.metaschema.core.metapath.item.atomic.ITemporalItem; +import dev.metaschema.core.metapath.item.atomic.ITimeItem; +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Unit tests for the picture string parser and formatting engine in + * {@link DateTimeFormatUtil}. + *

+ * These tests verify that {@link DateTimeFormatUtil#parsePictureString(String)} + * correctly parses XPath 3.1 picture strings (spec section 9.8) into a list of + * {@link DateTimeFormatUtil.FormatComponent} objects, including literal text, + * escaped brackets, variable markers with modifiers, width specifications, and + * error handling. + *

+ * Additionally, tests verify that + * {@link DateTimeFormatUtil#formatDateTime(ITemporalItem, String, String, String, String, Set)} + * correctly formats temporal values according to picture strings, covering all + * component specifiers (Y, M, D, d, F, W, w, H, h, P, m, s, f, Z, z, C, E), + * presentation modifiers, width modifiers, and error cases. + * + * @see XPath + * Functions 3.1 - Date Picture String + */ +class DateTimeFormatUtilTest { + + // ==================================================================== + // Test Fixture Values + // ==================================================================== + + /** Date: December 31, 2002 (the W3C spec example date). */ + private static final IDateItem TEST_DATE = IDateItem.valueOf("2002-12-31"); + + /** Time: 15:58:45.762 with +02:00 timezone. */ + private static final ITimeItem TEST_TIME = ITimeItem.valueOf("15:58:45.762+02:00"); + + /** DateTime: combining the above date and time. */ + private static final IDateTimeItem TEST_DATETIME + = IDateTimeItem.valueOf("2002-12-31T15:58:45.762+02:00"); + + /** Allowed markers for date-only functions (format-date). */ + private static final Set DATE_MARKERS + = Set.of('Y', 'M', 'D', 'd', 'F', 'W', 'w', 'C', 'E', 'Z', 'z'); + + /** Allowed markers for time-only functions (format-time). */ + private static final Set TIME_MARKERS + = Set.of('H', 'h', 'P', 'm', 's', 'f', 'Z', 'z'); + + /** Allowed markers for dateTime functions (format-dateTime). */ + private static final Set ALL_MARKERS + = Set.of('Y', 'M', 'D', 'd', 'F', 'W', 'w', 'H', 'h', 'P', 'm', 's', 'f', 'Z', 'z', 'C', 'E'); + + // ==================================================================== + // Helper methods + // ==================================================================== + + /** + * Assert that the given component is a + * {@link DateTimeFormatUtil.LiteralComponent} with the expected text. + * + * @param component + * the component to check + * @param expectedText + * the expected literal text + */ + private static void assertLiteral( + @NonNull DateTimeFormatUtil.FormatComponent component, + @NonNull String expectedText) { + assertInstanceOf(DateTimeFormatUtil.LiteralComponent.class, component); + assertEquals(expectedText, + ((DateTimeFormatUtil.LiteralComponent) component).getText()); + } + + /** + * Assert that the given component is a + * {@link DateTimeFormatUtil.VariableMarkerComponent} with the expected field + * values. + * + * @param component + * the component to check + * @param specifier + * the expected component specifier character + * @param primaryModifier + * the expected primary modifier string, or {@code null} + * @param secondModifier + * the expected second modifier character, or {@code null} + * @param minWidth + * the expected minimum width, or {@code null} + * @param maxWidth + * the expected maximum width, or {@code null} + */ + private static void assertMarker( + @NonNull DateTimeFormatUtil.FormatComponent component, + char specifier, + @Nullable String primaryModifier, + @Nullable Character secondModifier, + @Nullable Integer minWidth, + @Nullable Integer maxWidth) { + assertInstanceOf(DateTimeFormatUtil.VariableMarkerComponent.class, component); + DateTimeFormatUtil.VariableMarkerComponent marker + = (DateTimeFormatUtil.VariableMarkerComponent) component; + assertEquals(specifier, marker.getSpecifier(), "specifier"); + assertEquals(primaryModifier, marker.getPrimaryModifier(), "primaryModifier"); + assertEquals(secondModifier, marker.getSecondModifier(), "secondModifier"); + assertEquals(minWidth, marker.getMinWidth(), "minWidth"); + assertEquals(maxWidth, marker.getMaxWidth(), "maxWidth"); + } + + /** + * Format a temporal value using the given picture string, with default + * language, calendar, and place parameters. + * + * @param value + * the temporal value to format + * @param picture + * the picture string + * @param allowedMarkers + * the set of allowed component specifier characters + * @return the formatted string + */ + private static String format( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull Set allowedMarkers) { + return DateTimeFormatUtil.formatDateTime(value, picture, null, null, null, allowedMarkers); + } + + // ==================================================================== + // Group 1: Literal Text + // ==================================================================== + + @Test + void testParseLiteralText() { + List result + = DateTimeFormatUtil.parsePictureString("hello"); + + assertEquals(1, result.size()); + assertLiteral(result.get(0), "hello"); + } + + @Test + void testParseEmptyString() { + List result + = DateTimeFormatUtil.parsePictureString(""); + + assertTrue(result.isEmpty()); + } + + @Test + void testParseWhitespaceLiteral() { + List result + = DateTimeFormatUtil.parsePictureString(" "); + + assertEquals(1, result.size()); + assertLiteral(result.get(0), " "); + } + + // ==================================================================== + // Group 2: Escaped Brackets + // ==================================================================== + + @Test + void testParseEscapedOpenBracket() { + List result + = DateTimeFormatUtil.parsePictureString("[["); + + assertEquals(1, result.size()); + assertLiteral(result.get(0), "["); + } + + @Test + void testParseEscapedCloseBracket() { + List result + = DateTimeFormatUtil.parsePictureString("]]"); + + assertEquals(1, result.size()); + assertLiteral(result.get(0), "]"); + } + + @Test + void testParseEscapedBracketsAroundContent() { + List result + = DateTimeFormatUtil.parsePictureString("[[value]]"); + + assertEquals(1, result.size()); + assertLiteral(result.get(0), "[value]"); + } + + // ==================================================================== + // Group 3: Simple Variable Markers + // ==================================================================== + + @Test + void testParseSimpleYearMarker() { + List result + = DateTimeFormatUtil.parsePictureString("[Y]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'Y', null, null, null, null); + } + + @Test + void testParseSimpleMonthMarker() { + List result + = DateTimeFormatUtil.parsePictureString("[M]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', null, null, null, null); + } + + @Test + void testParseSimpleDayMarker() { + List result + = DateTimeFormatUtil.parsePictureString("[D]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'D', null, null, null, null); + } + + @Test + void testParseAllSimpleMarkers() { + char[] specifiers = { 'Y', 'M', 'D', 'd', 'F', 'W', 'w', 'H', 'h', 'P', + 'm', 's', 'f', 'Z', 'z', 'C', 'E' }; + + for (char specifier : specifiers) { + String picture = "[" + specifier + "]"; + List result + = DateTimeFormatUtil.parsePictureString(picture); + + assertEquals(1, result.size(), + "Expected 1 component for picture: " + picture); + assertMarker(result.get(0), specifier, null, null, null, null); + } + } + + // ==================================================================== + // Group 4: Whitespace in Markers + // ==================================================================== + + @Test + void testParseMarkerWithWhitespace() { + List result + = DateTimeFormatUtil.parsePictureString("[ Y 0001 ]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'Y', "0001", null, null, null); + } + + // ==================================================================== + // Group 5: Markers with Primary Modifier + // ==================================================================== + + @Test + void testParseYearWithDecimalPattern() { + List result + = DateTimeFormatUtil.parsePictureString("[Y0001]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'Y', "0001", null, null, null); + } + + @Test + void testParseMonthWithZeroPadded() { + List result + = DateTimeFormatUtil.parsePictureString("[M01]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "01", null, null, null); + } + + @Test + void testParseMonthTitleCase() { + List result + = DateTimeFormatUtil.parsePictureString("[MNn]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "Nn", null, null, null); + } + + @Test + void testParseMonthUpperCase() { + List result + = DateTimeFormatUtil.parsePictureString("[MN]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "N", null, null, null); + } + + @Test + void testParseMonthLowerCase() { + List result + = DateTimeFormatUtil.parsePictureString("[Mn]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "n", null, null, null); + } + + @Test + void testParseMonthRomanLower() { + List result + = DateTimeFormatUtil.parsePictureString("[Mi]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "i", null, null, null); + } + + @Test + void testParseMonthRomanUpper() { + List result + = DateTimeFormatUtil.parsePictureString("[MI]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "I", null, null, null); + } + + @Test + void testParseMonthWords() { + List result + = DateTimeFormatUtil.parsePictureString("[Mw]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "w", null, null, null); + } + + // ==================================================================== + // Group 6: Markers with Second Modifier + // ==================================================================== + + @Test + void testParseDayOrdinal() { + List result + = DateTimeFormatUtil.parsePictureString("[D1o]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'D', "1", 'o', null, null); + } + + @Test + void testParseDayCardinal() { + List result + = DateTimeFormatUtil.parsePictureString("[D1c]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'D', "1", 'c', null, null); + } + + @Test + void testParseDayWordsOrdinal() { + List result + = DateTimeFormatUtil.parsePictureString("[Dwo]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'D', "w", 'o', null, null); + } + + // ==================================================================== + // Group 7: Width Modifiers + // ==================================================================== + + @Test + void testParseMonthMinWidth() { + List result + = DateTimeFormatUtil.parsePictureString("[M,2]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', null, null, 2, null); + } + + @Test + void testParseMonthExactWidth() { + List result + = DateTimeFormatUtil.parsePictureString("[M,2-2]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', null, null, 2, 2); + } + + @Test + void testParseNameMaxWidth() { + // "*" for minWidth means unbounded, represented as null + List result + = DateTimeFormatUtil.parsePictureString("[MNn,*-3]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "Nn", null, null, 3); + } + + @Test + void testParseNameExactWidth() { + List result + = DateTimeFormatUtil.parsePictureString("[MNn,3-3]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'M', "Nn", null, 3, 3); + } + + @Test + void testParseYearWidthOnly() { + List result + = DateTimeFormatUtil.parsePictureString("[Y,4-4]"); + + assertEquals(1, result.size()); + assertMarker(result.get(0), 'Y', null, null, 4, 4); + } + + // ==================================================================== + // Group 8: Complex Picture Strings + // ==================================================================== + + @Test + void testParseIsoDateFormat() { + List result + = DateTimeFormatUtil.parsePictureString("[Y0001]-[M01]-[D01]"); + + assertEquals(5, result.size()); + assertMarker(result.get(0), 'Y', "0001", null, null, null); + assertLiteral(result.get(1), "-"); + assertMarker(result.get(2), 'M', "01", null, null, null); + assertLiteral(result.get(3), "-"); + assertMarker(result.get(4), 'D', "01", null, null, null); + } + + @Test + void testParseDateWithLiterals() { + List result + = DateTimeFormatUtil.parsePictureString("[D] [MNn] [Y]"); + + assertEquals(5, result.size()); + assertMarker(result.get(0), 'D', null, null, null, null); + assertLiteral(result.get(1), " "); + assertMarker(result.get(2), 'M', "Nn", null, null, null); + assertLiteral(result.get(3), " "); + assertMarker(result.get(4), 'Y', null, null, null, null); + } + + @Test + void testParseTimeFormat() { + List result + = DateTimeFormatUtil.parsePictureString("[h]:[m01]:[s01] [P]"); + + assertEquals(7, result.size()); + assertMarker(result.get(0), 'h', null, null, null, null); + assertLiteral(result.get(1), ":"); + assertMarker(result.get(2), 'm', "01", null, null, null); + assertLiteral(result.get(3), ":"); + assertMarker(result.get(4), 's', "01", null, null, null); + assertLiteral(result.get(5), " "); + assertMarker(result.get(6), 'P', null, null, null, null); + } + + @Test + void testParseEscapedBracketsWithDate() { + // [[[Y0001]-[M01]-[D01]]] + // Parsing: [[ = literal "[", then [Y0001], literal "-", [M01], literal "-", + // [D01], ]] = literal "]" + List result + = DateTimeFormatUtil.parsePictureString("[[[Y0001]-[M01]-[D01]]]"); + + assertEquals(7, result.size()); + assertLiteral(result.get(0), "["); + assertMarker(result.get(1), 'Y', "0001", null, null, null); + assertLiteral(result.get(2), "-"); + assertMarker(result.get(3), 'M', "01", null, null, null); + assertLiteral(result.get(4), "-"); + assertMarker(result.get(5), 'D', "01", null, null, null); + assertLiteral(result.get(6), "]"); + } + + // ==================================================================== + // Group 9: Error Cases + // ==================================================================== + + @Test + void testParseUnknownMarker() { + assertThrows(FormatDateTimeFunctionException.class, + () -> DateTimeFormatUtil.parsePictureString("[X]")); + } + + @Test + void testParseUnmatchedOpenBracket() { + assertThrows(FormatDateTimeFunctionException.class, + () -> DateTimeFormatUtil.parsePictureString("[Y")); + } + + @Test + void testParseUnmatchedCloseBracket() { + assertThrows(FormatDateTimeFunctionException.class, + () -> DateTimeFormatUtil.parsePictureString("]")); + } + + @Test + void testParseMinWidthLessThanOne() { + assertThrows(FormatDateTimeFunctionException.class, + () -> DateTimeFormatUtil.parsePictureString("[M,0]")); + } + + @Test + void testParseMaxLessThanMin() { + assertThrows(FormatDateTimeFunctionException.class, + () -> DateTimeFormatUtil.parsePictureString("[M,3-1]")); + } + + // ==================================================================== + // Formatting Engine Tests + // ==================================================================== + + // ==================================================================== + // Group 10: Year Formatting + // ==================================================================== + + private static Stream provideYearFormats() { + return Stream.of( + // [Y] default - full year + Arguments.of(TEST_DATE, "[Y]", "2002"), + // [Y0001] - 4 digit padded + Arguments.of(TEST_DATE, "[Y0001]", "2002"), + // [Y01] - modulo 10^2 = last 2 digits (spec 9.8.4.4 modulo rule) + Arguments.of(TEST_DATE, "[Y01]", "02"), + // [Y,4-4] - padded to exactly 4 with width modifier + Arguments.of(IDateItem.valueOf("0005-01-01"), "[Y,4-4]", "0005"), + // [Y1] - single digit pattern = no truncation, full year + Arguments.of(TEST_DATE, "[Y1]", "2002")); + } + + @ParameterizedTest + @MethodSource("provideYearFormats") + void testYearFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, DATE_MARKERS)); + } + + // ==================================================================== + // Group 11: Month Formatting + // ==================================================================== + + private static Stream provideMonthFormats() { + return Stream.of( + // Default decimal + Arguments.of(TEST_DATE, "[M]", "12"), + // Zero-padded + Arguments.of(IDateItem.valueOf("2002-03-15"), "[M01]", "03"), + // Title-case name + Arguments.of(IDateItem.valueOf("2002-03-15"), "[MNn]", "March"), + // Uppercase name + Arguments.of(IDateItem.valueOf("2002-03-15"), "[MN]", "MARCH"), + // Lowercase name + Arguments.of(IDateItem.valueOf("2002-03-15"), "[Mn]", "march"), + // Abbreviated name (max width 3) + Arguments.of(IDateItem.valueOf("2002-03-15"), "[MNn,*-3]", "Mar"), + // Exact 3 chars + Arguments.of(IDateItem.valueOf("2002-03-15"), "[MNn,3-3]", "Mar"), + // Roman upper + Arguments.of(IDateItem.valueOf("2002-03-15"), "[MI]", "III"), + // Roman lower + Arguments.of(IDateItem.valueOf("2002-03-15"), "[Mi]", "iii"), + // Month 12 name + Arguments.of(TEST_DATE, "[MNn]", "December")); + } + + @ParameterizedTest + @MethodSource("provideMonthFormats") + void testMonthFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, DATE_MARKERS)); + } + + // ==================================================================== + // Group 12: Day Formatting + // ==================================================================== + + private static Stream provideDayFormats() { + return Stream.of( + // Default decimal + Arguments.of(TEST_DATE, "[D]", "31"), + // Zero-padded + Arguments.of(IDateItem.valueOf("2002-03-05"), "[D01]", "05"), + // Ordinal + Arguments.of(IDateItem.valueOf("2002-03-01"), "[D1o]", "1st"), + Arguments.of(IDateItem.valueOf("2002-03-02"), "[D1o]", "2nd"), + Arguments.of(IDateItem.valueOf("2002-03-03"), "[D1o]", "3rd"), + Arguments.of(IDateItem.valueOf("2002-03-04"), "[D1o]", "4th"), + Arguments.of(IDateItem.valueOf("2002-03-11"), "[D1o]", "11th"), + Arguments.of(IDateItem.valueOf("2002-03-12"), "[D1o]", "12th"), + Arguments.of(IDateItem.valueOf("2002-03-13"), "[D1o]", "13th"), + Arguments.of(IDateItem.valueOf("2002-03-21"), "[D1o]", "21st"), + Arguments.of(TEST_DATE, "[D1o]", "31st")); + } + + @ParameterizedTest + @MethodSource("provideDayFormats") + void testDayFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, DATE_MARKERS)); + } + + // ==================================================================== + // Group 13: Day of Year + // ==================================================================== + + private static Stream provideDayOfYearFormats() { + return Stream.of( + Arguments.of(IDateItem.valueOf("2002-01-01"), "[d]", "1"), + Arguments.of(IDateItem.valueOf("2002-12-31"), "[d]", "365"), + // leap year + Arguments.of(IDateItem.valueOf("2004-12-31"), "[d]", "366"), + Arguments.of(IDateItem.valueOf("2002-01-05"), "[d001]", "005")); + } + + @ParameterizedTest + @MethodSource("provideDayOfYearFormats") + void testDayOfYearFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, DATE_MARKERS)); + } + + // ==================================================================== + // Group 14: Day of Week + // ==================================================================== + + private static Stream provideDayOfWeekFormats() { + return Stream.of( + // 2002-12-31 is a Tuesday + // F default is name lowercase (n) + Arguments.of(TEST_DATE, "[F]", "tuesday"), + // Numeric ISO (Mon=1, Sun=7) + Arguments.of(TEST_DATE, "[F1]", "2"), // Tuesday = 2 + Arguments.of(IDateItem.valueOf("2002-12-30"), "[F1]", "1"), // Monday = 1 + Arguments.of(IDateItem.valueOf("2003-01-05"), "[F1]", "7"), // Sunday = 7 + // Title-case name + Arguments.of(TEST_DATE, "[FNn]", "Tuesday"), + // Abbreviated + Arguments.of(TEST_DATE, "[FNn,*-3]", "Tue")); + } + + @ParameterizedTest + @MethodSource("provideDayOfWeekFormats") + void testDayOfWeekFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, DATE_MARKERS)); + } + + // ==================================================================== + // Group 15: Hour Formatting (24-hour and 12-hour) + // ==================================================================== + + private static Stream provideHourFormats() { + return Stream.of( + // 24-hour (H), default is "1" + Arguments.of(TEST_TIME, "[H]", "15"), + Arguments.of(ITimeItem.valueOf("00:00:00+00:00"), "[H]", "0"), + Arguments.of(ITimeItem.valueOf("00:00:00+00:00"), "[H01]", "00"), + Arguments.of(ITimeItem.valueOf("23:59:59+00:00"), "[H]", "23"), + // 12-hour (h), default is "1" + Arguments.of(ITimeItem.valueOf("00:00:00+00:00"), "[h]", "12"), // midnight = 12 + Arguments.of(ITimeItem.valueOf("12:00:00+00:00"), "[h]", "12"), // noon = 12 + Arguments.of(ITimeItem.valueOf("13:00:00+00:00"), "[h]", "1"), + Arguments.of(ITimeItem.valueOf("23:00:00+00:00"), "[h]", "11"), + Arguments.of(TEST_TIME, "[h]", "3")); // 15:58 -> 3 + } + + @ParameterizedTest + @MethodSource("provideHourFormats") + void testHourFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, TIME_MARKERS)); + } + + // ==================================================================== + // Group 16: AM/PM + // ==================================================================== + + private static Stream provideAmPmFormats() { + return Stream.of( + // P default is name lowercase (n) + Arguments.of(ITimeItem.valueOf("00:00:00+00:00"), "[P]", "am"), + Arguments.of(ITimeItem.valueOf("00:00:00+00:00"), "[PN]", "AM"), + Arguments.of(ITimeItem.valueOf("12:00:00+00:00"), "[Pn]", "pm"), + Arguments.of(ITimeItem.valueOf("12:00:00+00:00"), "[PN]", "PM"), + Arguments.of(TEST_TIME, "[PN]", "PM")); + } + + @ParameterizedTest + @MethodSource("provideAmPmFormats") + void testAmPmFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, TIME_MARKERS)); + } + + // ==================================================================== + // Group 17: Minute and Second + // ==================================================================== + + private static Stream provideMinuteSecondFormats() { + return Stream.of( + // Minute default is "01" (zero-padded) + Arguments.of(ITimeItem.valueOf("12:05:00+00:00"), "[m]", "05"), + Arguments.of(ITimeItem.valueOf("12:05:00+00:00"), "[m1]", "5"), + Arguments.of(TEST_TIME, "[m]", "58"), + // Second default is "01" (zero-padded) + Arguments.of(ITimeItem.valueOf("12:00:00+00:00"), "[s]", "00"), + Arguments.of(ITimeItem.valueOf("12:00:09+00:00"), "[s1]", "9"), + Arguments.of(TEST_TIME, "[s]", "45")); + } + + @ParameterizedTest + @MethodSource("provideMinuteSecondFormats") + void testMinuteSecondFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, TIME_MARKERS)); + } + + // ==================================================================== + // Group 18: Fractional Seconds + // ==================================================================== + + private static Stream provideFractionalSecondFormats() { + return Stream.of( + // [f001] -> 3 digits (milliseconds) + Arguments.of(TEST_TIME, "[f001]", "762"), // 0.762 seconds + // [f001] with zero nanos + Arguments.of(ITimeItem.valueOf("12:00:00+00:00"), "[f001]", "000"), + // [f01] -> 2 digits (hundredths) - truncated not rounded + Arguments.of(TEST_TIME, "[f01]", "76"), + // [f1] -> all significant digits (single digit = no constraint) + Arguments.of(TEST_TIME, "[f1]", "762"), + // [f1,1-1] -> exactly 1 digit via width + Arguments.of(TEST_TIME, "[f1,1-1]", "7")); + } + + @ParameterizedTest + @MethodSource("provideFractionalSecondFormats") + void testFractionalSecondFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, TIME_MARKERS)); + } + + // ==================================================================== + // Group 19: Timezone (Z) + // ==================================================================== + + private static Stream provideTimezoneFormats() { + return Stream.of( + // Default Z format is "01:01" + Arguments.of(TEST_DATETIME, "[Z]", "+02:00"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45-08:00"), "[Z]", "-08:00"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+00:00"), "[Z]", "+00:00"), + // No timezone -> empty + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45"), "[Z]", ""), + // Abbreviated forms + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+05:00"), "[Z0]", "+5"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+05:30"), "[Z0]", "+5:30"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+05:00"), "[Z0:00]", "+5:00"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+05:00"), "[Z00:00]", "+05:00"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+05:30"), "[Z0000]", "+0530"), + // t modifier (UTC -> Z) + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+00:00"), "[Z01:01t]", "Z"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+05:00"), "[Z01:01t]", "+05:00"), + // Military timezone + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+00:00"), "[ZZ]", "Z"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+01:00"), "[ZZ]", "A"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45-01:00"), "[ZZ]", "N"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+12:00"), "[ZZ]", "M"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45-12:00"), "[ZZ]", "Y"), + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45+05:30"), "[ZZ]", "+05:30"), + // Military J for local time (no timezone) + Arguments.of(IDateTimeItem.valueOf("2002-12-31T15:58:45"), "[ZZ]", "J")); + } + + @ParameterizedTest + @MethodSource("provideTimezoneFormats") + void testTimezoneFormatting( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull String expected) { + assertEquals(expected, format(value, picture, ALL_MARKERS)); + } + + // ==================================================================== + // Group 20: GMT Prefix Timezone (z) + // ==================================================================== + + @Test + void testGmtTimezone() { + assertEquals("GMT+02:00", + format(TEST_DATETIME, "[z]", ALL_MARKERS)); + assertEquals("GMT+00:00", + format(IDateTimeItem.valueOf("2002-12-31T15:58:45+00:00"), "[z]", ALL_MARKERS)); + // No timezone -> empty + assertEquals("", + format(IDateTimeItem.valueOf("2002-12-31T15:58:45"), "[z]", ALL_MARKERS)); + } + + // ==================================================================== + // Group 21: Calendar and Era + // ==================================================================== + + @Test + void testCalendarAndEra() { + // C default is name lowercase (n) + assertEquals("ad", format(TEST_DATE, "[C]", DATE_MARKERS)); + assertEquals("AD", format(TEST_DATE, "[CN]", DATE_MARKERS)); + // E default is name lowercase (n) + assertEquals("ad", format(TEST_DATE, "[E]", DATE_MARKERS)); + assertEquals("AD", format(TEST_DATE, "[EN]", DATE_MARKERS)); + } + + // ==================================================================== + // Group 22: Width Modifier Enforcement + // ==================================================================== + + @Test + void testWidthModifierTruncation() { + // [MNn,3-3] truncates "December" to "Dec" + assertEquals("Dec", format(TEST_DATE, "[MNn,3-3]", DATE_MARKERS)); + // [MNn,3-3] with "May" -> "May" (already 3) + assertEquals("May", format(IDateItem.valueOf("2002-05-15"), "[MNn,3-3]", DATE_MARKERS)); + } + + @Test + void testWidthModifierPadding() { + // [MNn,10] pads "May" to 10 chars with spaces + String result = format(IDateItem.valueOf("2002-05-15"), "[MNn,10]", DATE_MARKERS); + assertEquals(10, result.length()); + assertTrue(result.startsWith("May")); + } + + // ==================================================================== + // Group 23: Spec Examples (Integration Tests) + // ==================================================================== + + private static Stream provideSpecExamples() { + IDateItem specDate = IDateItem.valueOf("2002-12-31"); + ITimeItem specTime = ITimeItem.valueOf("15:58:45.762+02:00"); + IDateTimeItem specDateTime = IDateTimeItem.valueOf("2002-12-31T15:58:45+02:00"); + return Stream.of( + Arguments.of(specDate, "[Y0001]-[M01]-[D01]", DATE_MARKERS, "2002-12-31"), + Arguments.of(specDate, "[D1] [MI] [Y]", DATE_MARKERS, "31 XII 2002"), + Arguments.of(specDate, "[D1o] [MNn], [Y]", DATE_MARKERS, "31st December, 2002"), + Arguments.of(specDate, "[D01] [MN,*-3] [Y0001]", DATE_MARKERS, "31 DEC 2002"), + Arguments.of(specDate, "[MNn] [D], [Y]", DATE_MARKERS, "December 31, 2002"), + Arguments.of(specDate, "[[[Y0001]-[M01]-[D01]]]", DATE_MARKERS, "[2002-12-31]"), + Arguments.of(specTime, "[h]:[m01] [PN]", TIME_MARKERS, "3:58 PM"), + Arguments.of(specTime, "[h]:[m01]:[s01] [Pn]", TIME_MARKERS, "3:58:45 pm"), + Arguments.of(specTime, "[H01]:[m01]", TIME_MARKERS, "15:58"), + Arguments.of(specTime, "[H01]:[m01]:[s01].[f001]", TIME_MARKERS, "15:58:45.762"), + Arguments.of(specDateTime, "[M01]/[D01]/[Y0001] at [H01]:[m01]:[s01]", ALL_MARKERS, + "12/31/2002 at 15:58:45")); + } + + @ParameterizedTest + @MethodSource("provideSpecExamples") + void testSpecExamples( + @NonNull ITemporalItem value, + @NonNull String picture, + @NonNull Set allowedMarkers, + @NonNull String expected) { + assertEquals(expected, + DateTimeFormatUtil.formatDateTime(value, picture, null, null, null, allowedMarkers)); + } + + // ==================================================================== + // Group 24: Error Cases + // ==================================================================== + + @Test + void testComponentNotAvailable() { + // format-time with date marker -> FOFD1350 + assertThrows(FormatDateTimeFunctionException.class, + () -> format(TEST_TIME, "[Y]", TIME_MARKERS)); + } + + // ==================================================================== + // Group 25: Week Formatting + // ==================================================================== + + @Test + void testWeekOfYear() { + // 2002-12-31 is in ISO week 1 of 2003 + assertEquals("1", format(TEST_DATE, "[W]", DATE_MARKERS)); + assertEquals("01", format(TEST_DATE, "[W01]", DATE_MARKERS)); + } + + @Test + void testWeekOfMonth() { + // 2002-12-31 -> week of month varies; just check it's a valid number + String result = format(TEST_DATE, "[w]", DATE_MARKERS); + assertTrue(Integer.parseInt(result) >= 1 && Integer.parseInt(result) <= 6); + } +} diff --git a/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatDateTest.java b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatDateTest.java new file mode 100644 index 000000000..6c596d29e --- /dev/null +++ b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatDateTest.java @@ -0,0 +1,98 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import static dev.metaschema.core.metapath.TestUtils.string; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +import dev.metaschema.core.metapath.ExpressionTestBase; +import dev.metaschema.core.metapath.IMetapathExpression; +import dev.metaschema.core.metapath.MetapathException; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Tests for the fn:format-date function. + */ +class FnFormatDateTest + extends ExpressionTestBase { + + private static Stream provideValues() { + return Stream.of( + // W3C spec examples (section 9.8.5) + Arguments.of( + string("2002-12-31"), + "format-date(meta:date('2002-12-31'), '[Y0001]-[M01]-[D01]')"), + Arguments.of( + string("31 XII 2002"), + "format-date(meta:date('2002-12-31'), '[D1] [MI] [Y]')"), + Arguments.of( + string("31st December, 2002"), + "format-date(meta:date('2002-12-31'), '[D1o] [MNn], [Y]', 'en', (), ())"), + Arguments.of( + string("31 DEC 2002"), + "format-date(meta:date('2002-12-31'), '[D01] [MN,*-3] [Y0001]', 'en', (), ())"), + Arguments.of( + string("December 31, 2002"), + "format-date(meta:date('2002-12-31'), '[MNn] [D], [Y]', 'en', (), ())"), + // Escaped brackets + Arguments.of( + string("[2002-12-31]"), + "format-date(meta:date('2002-12-31'), '[[[Y0001]-[M01]-[D01]]]')"), + // Day of week + Arguments.of( + string("Tuesday"), + "format-date(meta:date('2002-12-31'), '[FNn]', 'en', (), ())"), + // Month name + Arguments.of( + string("March"), + "format-date(meta:date('2002-03-15'), '[MNn]')"), + // Roman numerals for month + Arguments.of( + string("III"), + "format-date(meta:date('2002-03-15'), '[MI]')"), + // 2-digit year (modulo rule) + Arguments.of( + string("02"), + "format-date(meta:date('2002-12-31'), '[Y01]')")); + } + + @Test + void testEmptySequenceReturnsEmptySequence() { + // Spec: "If $value is the empty sequence, the function returns the empty + // sequence." + assertNull( + IMetapathExpression.compile("format-date((), '[Y0001]')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } + + @ParameterizedTest + @MethodSource("provideValues") + void testExpression(@NonNull IStringItem expected, @NonNull String metapath) { + assertEquals( + expected, + IMetapathExpression.compile(metapath) + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } + + @Test + void testTimeMarkerRejectsForDate() { + // format-date should reject time-only markers (H, h, P, m, s, f) + assertThrows(MetapathException.class, () -> { + IMetapathExpression.compile("format-date(meta:date('2002-12-31'), '[H01]')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext()); + }); + } +} diff --git a/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatDateTimeTest.java b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatDateTimeTest.java new file mode 100644 index 000000000..e61c322a5 --- /dev/null +++ b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatDateTimeTest.java @@ -0,0 +1,80 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import static dev.metaschema.core.metapath.TestUtils.string; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +import dev.metaschema.core.metapath.ExpressionTestBase; +import dev.metaschema.core.metapath.IMetapathExpression; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Tests for the fn:format-dateTime function. + */ +class FnFormatDateTimeTest + extends ExpressionTestBase { + + private static Stream provideValues() { + return Stream.of( + // 2-arg form (basic formatting) + Arguments.of( + string("2002-12-31"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[Y0001]-[M01]-[D01]')"), + Arguments.of( + string("12/31/2002 at 15:58:45"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[M01]/[D01]/[Y0001] at [H01]:[m01]:[s01]')"), + Arguments.of( + string("15:58"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[H01]:[m01]')"), + // 5-arg form with language + Arguments.of( + string("31st December, 2002"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[D1o] [MNn], [Y]', 'en', (), ())"), + Arguments.of( + string("December 31, 2002"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[MNn] [D], [Y]', 'en', (), ())"), + // Escaped brackets + Arguments.of( + string("[2002-12-31]"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[[[Y0001]-[M01]-[D01]]]')"), + // Time components in dateTime + Arguments.of( + string("3:58 PM"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[h]:[m01] [PN]', 'en', (), ())"), + // Timezone + Arguments.of( + string("+02:00"), + "format-dateTime(meta:date-time('2002-12-31T15:58:45+02:00'), '[Z]')")); + } + + @ParameterizedTest + @MethodSource("provideValues") + void testExpression(@NonNull IStringItem expected, @NonNull String metapath) { + assertEquals( + expected, + IMetapathExpression.compile(metapath) + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } + + @Test + void testEmptySequenceReturnsEmptySequence() { + // Spec: "If $value is the empty sequence, the function returns the empty + // sequence." + assertNull( + IMetapathExpression.compile("format-dateTime((), '[Y0001]')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } +} diff --git a/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatIntegerTest.java b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatIntegerTest.java new file mode 100644 index 000000000..9be49d2f4 --- /dev/null +++ b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatIntegerTest.java @@ -0,0 +1,160 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import static dev.metaschema.core.metapath.TestUtils.string; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +import dev.metaschema.core.metapath.ExpressionTestBase; +import dev.metaschema.core.metapath.IMetapathExpression; +import dev.metaschema.core.metapath.MetapathException; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import edu.umd.cs.findbugs.annotations.NonNull; + +class FnFormatIntegerTest + extends ExpressionTestBase { + private static Stream provideValues() { // NOPMD - false positive + return Stream.of( + // Decimal digit patterns + Arguments.of( + string("123"), + "format-integer(123, '1')"), + Arguments.of( + string("0123"), + "format-integer(123, '0000')"), + Arguments.of( + string("123"), + "format-integer(123, '01')"), + Arguments.of( + string("0"), + "format-integer(0, '1')"), + Arguments.of( + string("-123"), + "format-integer(-123, '1')"), + // Alphabetic sequences + Arguments.of( + string("a"), + "format-integer(1, 'a')"), + Arguments.of( + string("z"), + "format-integer(26, 'a')"), + Arguments.of( + string("aa"), + "format-integer(27, 'a')"), + Arguments.of( + string("A"), + "format-integer(1, 'A')"), + Arguments.of( + string("g"), + "format-integer(7, 'a')"), + // Roman numerals + Arguments.of( + string("i"), + "format-integer(1, 'i')"), + Arguments.of( + string("iv"), + "format-integer(4, 'i')"), + Arguments.of( + string("LVII"), + "format-integer(57, 'I')"), + Arguments.of( + string("MCMXCIX"), + "format-integer(1999, 'I')"), + // Words + Arguments.of( + string("one hundred twenty-three"), + "format-integer(123, 'w')"), + Arguments.of( + string("ONE"), + "format-integer(1, 'W')"), + Arguments.of( + string("Twenty-One"), + "format-integer(21, 'Ww')"), + // Ordinal modifier (3-arg form) + Arguments.of( + string("1st"), + "format-integer(1, '1;o', 'en')"), + Arguments.of( + string("2nd"), + "format-integer(2, '1;o', 'en')"), + Arguments.of( + string("3rd"), + "format-integer(3, '1;o', 'en')"), + Arguments.of( + string("4th"), + "format-integer(4, '1;o', 'en')"), + Arguments.of( + string("11th"), + "format-integer(11, '1;o', 'en')"), + Arguments.of( + string("12th"), + "format-integer(12, '1;o', 'en')"), + Arguments.of( + string("13th"), + "format-integer(13, '1;o', 'en')"), + Arguments.of( + string("21st"), + "format-integer(21, '1;o', 'en')"), + Arguments.of( + string("111th"), + "format-integer(111, '1;o', 'en')"), + Arguments.of( + string("112th"), + "format-integer(112, '1;o', 'en')"), + // Grouping separators + Arguments.of( + string("1,000,000"), + "format-integer(1000000, '#,##0')"), + Arguments.of( + string("15"), + "format-integer(15, '#,##0')"), + Arguments.of( + string("1;234"), + "format-integer(1234, '#;##0;')"), + // Empty sequence + Arguments.of( + string(""), + "format-integer((), '1')")); + } + + @ParameterizedTest + @MethodSource("provideValues") + void testExpression(@NonNull IStringItem expected, @NonNull String metapath) { + assertEquals( + expected, + IMetapathExpression.compile(metapath) + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } + + @Test + void testOrdinalWithWordFormatFallsBackToCardinal() { + // Spec: "If ordinal numbering is not supported for the combination of the + // format token, the language, and the string appearing in parentheses, the + // request is ignored and cardinal numbers are generated instead." + // Our implementation does not support word ordinals (first, second, etc.), + // so it must fall back to cardinal (one, two, etc.) + assertEquals( + string("one"), + IMetapathExpression.compile("format-integer(1, 'w;o', 'en')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } + + @Test + void testInvalidFormatToken() { + assertThrows(MetapathException.class, () -> { + IMetapathExpression.compile("format-integer(1, '')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext()); + }); + } +} diff --git a/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatTimeTest.java b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatTimeTest.java new file mode 100644 index 000000000..27a0ab7c2 --- /dev/null +++ b/core/src/test/java/dev/metaschema/core/metapath/function/library/FnFormatTimeTest.java @@ -0,0 +1,97 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package dev.metaschema.core.metapath.function.library; + +import static dev.metaschema.core.metapath.TestUtils.string; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +import dev.metaschema.core.metapath.ExpressionTestBase; +import dev.metaschema.core.metapath.IMetapathExpression; +import dev.metaschema.core.metapath.MetapathException; +import dev.metaschema.core.metapath.item.atomic.IStringItem; +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * Tests for the fn:format-time function. + */ +class FnFormatTimeTest + extends ExpressionTestBase { + + private static Stream provideValues() { + return Stream.of( + // W3C spec examples (section 9.8.5) + Arguments.of( + string("3:58 PM"), + "format-time(meta:time('15:58:45.762+02:00'), '[h]:[m01] [PN]', 'en', (), ())"), + Arguments.of( + string("3:58:45 pm"), + "format-time(meta:time('15:58:45.762+02:00'), '[h]:[m01]:[s01] [Pn]', 'en', (), ())"), + Arguments.of( + string("15:58"), + "format-time(meta:time('15:58:45.762+02:00'), '[H01]:[m01]')"), + Arguments.of( + string("15:58:45.762"), + "format-time(meta:time('15:58:45.762+02:00'), '[H01]:[m01]:[s01].[f001]')"), + // 12-hour with midnight + Arguments.of( + string("12:00 AM"), + "format-time(meta:time('00:00:00+00:00'), '[h]:[m01] [PN]', 'en', (), ())"), + // 12-hour with noon + Arguments.of( + string("12:00 PM"), + "format-time(meta:time('12:00:00+00:00'), '[h]:[m01] [PN]', 'en', (), ())"), + // Timezone formatting + Arguments.of( + string("+02:00"), + "format-time(meta:time('15:58:45+02:00'), '[Z]')")); + } + + @Test + void testEmptySequenceReturnsEmptySequence() { + // Spec: "If $value is the empty sequence, the function returns the empty + // sequence." + assertNull( + IMetapathExpression.compile("format-time((), '[H01]')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } + + @ParameterizedTest + @MethodSource("provideValues") + void testExpression(@NonNull IStringItem expected, @NonNull String metapath) { + assertEquals( + expected, + IMetapathExpression.compile(metapath) + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext())); + } + + @Test + void testDateMarkerRejectsForTime() { + // format-time should reject date-only markers (Y, M, D, d, F, W, w) + assertThrows(MetapathException.class, () -> { + IMetapathExpression.compile("format-time(meta:time('15:58:45+02:00'), '[Y0001]')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext()); + }); + } + + @Test + void testEraMarkerRejectsForTime() { + // Era (E) is "a baseline for the numbering of years" per W3C spec, + // so it is not available for xs:time values (FOFD1350) + assertThrows(MetapathException.class, () -> { + IMetapathExpression.compile("format-time(meta:time('15:58:45+02:00'), '[En]')") + .evaluateAs(null, IMetapathExpression.ResultType.ITEM, newDynamicContext()); + }); + } +}