diff --git a/README.md b/README.md
index 9fb92ac..b690377 100644
--- a/README.md
+++ b/README.md
@@ -8,12 +8,14 @@ This library is based around the [XBRL 2.1 spec](https://www.xbrl.org/Specificat
It implements support for parsing basic facts (not tuples of facts), contexts and units through the `xml.Unmarshaler` interface.
See the package example in the godocs for how to unmarshal into the `XBRL` struct.
+You can also use `Parse`, `ParseReader`, or `Decode` as small convenience helpers around the same `encoding/xml` path.
-This library supports basic validation that checks for malformed facts and broken references between facts and contexts/units (see `XBRL.Validate()`),
+This library supports structural validation that checks malformed contexts, units, facts, duplicate IDs, unsupported scenarios, unsupported top-level base `item` and `tuple` elements, and broken references between facts and contexts/units (see `XBRL.Validate()`),
but it does _not_ implement full semantic validation of XBRL documents.
There are no abstractions added on-top of the XBRL data structure, which makes this library flexible and simple,
but it also means you might have to read up a bit on how XBRL works to take full advantage of it.
+The parser preserves lower-level XML details such as root attributes, XML names, raw link/reference elements, and generic segment content for callers that need them.
To give you a head start, here's some basics about XBRL:
@@ -52,7 +54,7 @@ The above fact doesn't directly tell us in which quarter EPS was `1.41`. That's
### Contexts
A [Context](https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7)
-describes a business entity, period of time, and an optional scenario (this library doesn't currently support scenarios, so we're going to gloss over them).
+describes a business entity, period of time, and an optional scenario (this library preserves scenario XML, but does not interpret scenario semantics).
When a fact references a context, it gives the fact more detail to help us understand what it means.
diff --git a/context.go b/context.go
index 057bcb3..f5a6f41 100644
--- a/context.go
+++ b/context.go
@@ -1,15 +1,20 @@
package xbrl
-import "encoding/xml"
+import (
+ "encoding/xml"
+ "errors"
+)
-// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario (scenario is NOT implemented).
+// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario.
// All of which are necessary for understanding a business Fact captured as an XBRL item.
+// Scenario is preserved as raw XML, but scenario validation and interpretation are not implemented.
// https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7
type Context struct {
ID string `xml:"id,attr"`
- Period Period `xml:"period"`
- Entity Entity `xml:"entity"`
+ Period Period `xml:"period"`
+ Entity Entity `xml:"entity"`
+ Scenario *RawElement `xml:"scenario"`
}
// Entity documents the business entity for a Context (business, government department, individual, etc.).
@@ -19,6 +24,23 @@ type Entity struct {
Segments Segments `xml:"segment"`
}
+// Validate checks that e contains the structural fields required by XBRL.
+func (e Entity) Validate() error {
+ if e.Identifier.Scheme == "" {
+ return errors.New("entity identifier missing scheme")
+ }
+ if e.Identifier.Value == "" {
+ return errors.New("entity identifier missing value")
+ }
+
+ return nil
+}
+
+// IsValid validates e and returns true if no error was found.
+func (e Entity) IsValid() bool {
+ return e.Validate() == nil
+}
+
// Identifier specifies a scheme for identifying business entities and an identifier that follows the scheme.
// https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.3.1
// For Example:
@@ -43,6 +65,7 @@ type Segment struct {
XMLName xml.Name
Attributes []xml.Attr `xml:",any,attr"`
Value string `xml:",chardata"`
+ InnerXML string `xml:",innerxml"`
}
// UnmarshalXML implements xml.Unmarshaller for Segments.
@@ -60,22 +83,27 @@ func (s *Segments) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
return nil
}
+// PeriodType describes which supported shape a Period has.
type PeriodType string
// All the supported PeriodType values. See Period.Type() for more information.
const (
+ // PeriodTypeDuration is a period with startDate and endDate.
PeriodTypeDuration PeriodType = "duration"
- PeriodTypeInstant PeriodType = "instant"
- PeriodTypeForever PeriodType = "forever"
- PeriodTypeInvalid PeriodType = "invalid"
+ // PeriodTypeInstant is a period with instant.
+ PeriodTypeInstant PeriodType = "instant"
+ // PeriodTypeForever is a period with forever.
+ PeriodTypeForever PeriodType = "forever"
+ // PeriodTypeInvalid is a period that does not match exactly one supported shape.
+ PeriodTypeInvalid PeriodType = "invalid"
)
// Period contains an instant or interval of time for a Context.
// https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.2
type Period struct {
- // StartDate is non-nil and guaranteed to be before EndDate if Period.Type() returns Duration.
+ // StartDate is non-nil if Period.Type() returns Duration.
StartDate *string `xml:"startDate"`
- // EndDate is non-nil and guaranteed to be after StartDate if Period.Type() returns Duration.
+ // EndDate is non-nil if Period.Type() returns Duration.
EndDate *string `xml:"endDate"`
// Instant is non-nil if Period.Type() returns Instant
@@ -90,17 +118,78 @@ type Period struct {
// Type returns the type of this period to help clarify what fields in the Period struct are non-nil and valid to use.
// The comments on the attributes inside the Period struct explain when they can be used depending on what this function returns.
func (p Period) Type() PeriodType {
+ periodType := PeriodTypeInvalid
+ matches := 0
+
if p.Forever != nil {
- return PeriodTypeForever
+ periodType = PeriodTypeForever
+ matches++
}
if p.Instant != nil {
- return PeriodTypeInstant
+ periodType = PeriodTypeInstant
+ matches++
}
if p.StartDate != nil && p.EndDate != nil {
- return PeriodTypeDuration
+ periodType = PeriodTypeDuration
+ matches++
+ }
+
+ if matches != 1 {
+ return PeriodTypeInvalid
+ }
+
+ return periodType
+}
+
+// Validate checks that p has exactly one supported XBRL period shape.
+func (p Period) Validate() error {
+ switch p.Type() {
+ case PeriodTypeDuration:
+ if *p.StartDate == "" {
+ return errors.New("duration period missing startDate")
+ }
+ if *p.EndDate == "" {
+ return errors.New("duration period missing endDate")
+ }
+ case PeriodTypeInstant:
+ if *p.Instant == "" {
+ return errors.New("instant period missing value")
+ }
+ case PeriodTypeForever:
+ return nil
+ default:
+ return errors.New("period must have exactly one of duration, instant, or forever")
}
- return PeriodTypeInvalid
+ return nil
+}
+
+// IsValid validates p and returns true if no error was found.
+func (p Period) IsValid() bool {
+ return p.Validate() == nil
+}
+
+// Validate checks that c contains the structural fields this parser supports.
+func (c Context) Validate() error {
+ if c.ID == "" {
+ return errors.New("context missing id")
+ }
+ if err := c.Entity.Validate(); err != nil {
+ return err
+ }
+ if err := c.Period.Validate(); err != nil {
+ return err
+ }
+ if c.Scenario != nil {
+ return errors.New("scenario is not supported")
+ }
+
+ return nil
+}
+
+// IsValid validates c and returns true if no error was found.
+func (c Context) IsValid() bool {
+ return c.Validate() == nil
}
diff --git a/context_test.go b/context_test.go
index 18fbdf1..b41fc38 100644
--- a/context_test.go
+++ b/context_test.go
@@ -74,13 +74,105 @@ func TestUnmarshalContext(t *testing.T) {
assert.Equal(t, xml.Name{Space: "xbrldi", Local: "explicitMember"}, context.Entity.Segments[0].XMLName)
assert.Equal(t, []xml.Attr{{Name: xml.Name{Local: "dimension"}, Value: "us-gaap:StatementClassOfStockAxis"}}, context.Entity.Segments[0].Attributes)
assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].Value)
+ assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].InnerXML)
assert.Equal(t, xml.Name{Space: "myns", Local: "cool_segment"}, context.Entity.Segments[1].XMLName)
assert.Empty(t, context.Entity.Segments[1].Attributes)
assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].Value)
+ assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].InnerXML)
assert.Equal(t, PeriodTypeDuration, context.Period.Type())
assert.Equal(t, "2020-09-27", *context.Period.StartDate)
assert.Equal(t, "2021-03-27", *context.Period.EndDate)
})
+
+ t.Run("segment preserves nested raw XML", func(t *testing.T) {
+ // language=xml
+ contextXML := `
+
+ 0000320193
+
+ value
+
+
+
+`
+
+ var context Context
+ require.NoError(t, xml.Unmarshal([]byte(contextXML), &context))
+
+ require.Len(t, context.Entity.Segments, 1)
+ assert.Equal(t, xml.Name{Space: "dim", Local: "typedMember"}, context.Entity.Segments[0].XMLName)
+ assert.Contains(t, context.Entity.Segments[0].InnerXML, "value")
+ })
+}
+
+func TestContextValidation(t *testing.T) {
+ t.Run("period must have exactly one shape", func(t *testing.T) {
+ period := Period{
+ StartDate: stringPtr("2020-09-27"),
+ EndDate: stringPtr("2021-03-27"),
+ Instant: stringPtr("2021-03-27"),
+ }
+
+ assert.Equal(t, PeriodTypeInvalid, period.Type())
+ assert.False(t, period.IsValid())
+ })
+
+ t.Run("duration requires start and end values", func(t *testing.T) {
+ period := Period{
+ StartDate: stringPtr("2020-09-27"),
+ EndDate: stringPtr(""),
+ }
+
+ assert.EqualError(t, period.Validate(), "duration period missing endDate")
+ })
+
+ t.Run("entity requires identifier scheme and value", func(t *testing.T) {
+ entity := Entity{
+ Identifier: Identifier{
+ Scheme: "http://www.sec.gov/CIK",
+ },
+ }
+
+ assert.EqualError(t, entity.Validate(), "entity identifier missing value")
+ })
+
+ t.Run("context requires id and entity identifier", func(t *testing.T) {
+ context := Context{
+ Period: Period{Instant: stringPtr("2021-03-27")},
+ Entity: Entity{
+ Identifier: Identifier{
+ Scheme: "http://www.sec.gov/CIK",
+ Value: "0000320193",
+ },
+ },
+ }
+
+ assert.EqualError(t, context.Validate(), "context missing id")
+ assert.False(t, context.IsValid())
+ })
+
+ t.Run("scenario is preserved but unsupported by validation", func(t *testing.T) {
+ // language=xml
+ contextXML := `
+
+ 0000320193
+
+
+ 2021-03-27
+
+
+ true
+
+`
+
+ var context Context
+ require.NoError(t, xml.Unmarshal([]byte(contextXML), &context))
+
+ require.NotNil(t, context.Scenario)
+ assert.Equal(t, xml.Name{Local: "scenario"}, context.Scenario.XMLName)
+ assert.Contains(t, context.Scenario.InnerXML, "forecast")
+ assert.EqualError(t, context.Validate(), "scenario is not supported")
+ })
}
diff --git a/doc.go b/doc.go
new file mode 100644
index 0000000..66c8735
--- /dev/null
+++ b/doc.go
@@ -0,0 +1,17 @@
+// Package xbrl parses XBRL 2.1 instance documents into simple Go data.
+//
+// The package preserves XBRL concepts such as facts, contexts, periods,
+// entities, segments, units, XML names, attributes, and raw reference elements.
+// It does not load taxonomies, resolve linkbases, normalize financial
+// statements, transform Inline XBRL, or perform accounting-rule validation.
+//
+// XML unmarshalling is a first-class API:
+//
+// var doc xbrl.XBRL
+// err := xml.Unmarshal(data, &doc)
+//
+// Parse, ParseReader, and Decode are convenience helpers around the same
+// encoding/xml path. Parsing and validation are separate operations; call
+// XBRL.Validate when you need structural checks for contexts, units, facts, and
+// references.
+package xbrl
diff --git a/example_unmarshal_test.go b/example_unmarshal_test.go
index 5abdf74..4cd0ceb 100644
--- a/example_unmarshal_test.go
+++ b/example_unmarshal_test.go
@@ -7,7 +7,11 @@ import (
"github.com/massive-com/xbrl-parser/v2"
)
-const doc = `
+const doc = `
@@ -32,12 +36,11 @@ func Example() {
if err := xml.Unmarshal([]byte(doc), &processed); err != nil {
panic(err)
}
-
- fact := processed.Facts[0]
- if !fact.IsValid() {
- panic("fact invalid!")
+ if err := processed.Validate(); err != nil {
+ panic(err)
}
+ fact := processed.Facts[0]
factType := fact.Type()
numericValue, err := fact.NumericValue()
@@ -48,9 +51,23 @@ func Example() {
panic(err)
}
- fmt.Printf("Fact: %s:%s (type: %s)\n", fact.XMLName.Space, fact.XMLName.Local, factType)
+ fmt.Printf("Fact: %s (namespace: %s, type: %s)\n", fact.XMLName.Local, fact.XMLName.Space, factType)
fmt.Printf(" %.0f %s on %s\n", numericValue, factUnit.String(), *factContext.Period.Instant)
- // Output: Fact: ci:assets (type: non_fraction)
+ // Output: Fact: assets (namespace: http://www.xbrl.org/us/gaap/ci/2003/usfr-ci-2003, type: non_fraction)
// 727 shares on 2021-04-16
}
+
+func ExampleParse() {
+ processed, err := xbrl.Parse([]byte(doc))
+ if err != nil {
+ panic(err)
+ }
+ if err := processed.Validate(); err != nil {
+ panic(err)
+ }
+
+ fmt.Println(len(processed.Facts))
+
+ // Output: 1
+}
diff --git a/fact.go b/fact.go
index c65a174..361d26c 100644
--- a/fact.go
+++ b/fact.go
@@ -6,6 +6,7 @@ import (
"strconv"
)
+// FactType describes the structural category of a parsed Fact.
type FactType string
const (
@@ -17,7 +18,7 @@ const (
// A non-numeric fact is guaranteed to have an XMLName, ContextRef, and ValueStr.
FactTypeNonNumeric FactType = "non_numeric"
- // FactTypeNonFraction is a non-nil fact describing a numeric value that can precisely expressed as a simple value.
+ // FactTypeNonFraction is a non-nil fact describing a numeric value that can be precisely expressed as a simple value.
// A non-fraction fact is guaranteed to have an XMLName, ContextRef, UnitRef, ValueStr, and exactly one of Precision or Decimals.
//
// For example: 727432
@@ -119,38 +120,115 @@ func (f Fact) Type() FactType {
return FactTypeNonNumeric
}
-// IsValid confirms that f has at least the required fields that the FactType requires.
-// Note that this function is not strict about extra fields existing.
+// IsValid confirms that f has the structural fields required for its FactType.
func (f Fact) IsValid() bool {
+ return f.Validate() == nil
+}
+
+// Validate checks that f has the structural fields required for its FactType.
+// It does not perform taxonomy-aware validation.
+func (f Fact) Validate() error {
// All facts must have a context ref
if f.ContextRef == "" {
- return false
+ return errors.New("missing contextRef")
}
// Some types have particular rules beyond what Type() checks for that must be true to be considered valid.
switch f.Type() {
case FactTypeFraction:
- // Fraction must have a non-zero Denominator
- return *f.Denominator != 0
+ if f.UnitRef == nil || *f.UnitRef == "" {
+ return errors.New("fraction fact missing unitRef")
+ }
+ if f.Numerator == nil {
+ return errors.New("fraction fact missing numerator")
+ }
+ if f.Denominator == nil {
+ return errors.New("fraction fact missing denominator")
+ }
+ if *f.Denominator == 0 {
+ return errors.New("fraction fact denominator is zero")
+ }
+ if f.Precision != nil || f.Decimals != nil {
+ return errors.New("fraction fact cannot have precision or decimals")
+ }
case FactTypeNonFraction:
- // NonFractions must have either a non-nil Precision or non-nil Decimals field
- return (f.Precision == nil) != (f.Decimals == nil)
+ if f.UnitRef == nil || *f.UnitRef == "" {
+ return errors.New("non-fraction fact missing unitRef")
+ }
+ if f.ValueStr == nil {
+ return errors.New("non-fraction fact missing value")
+ }
+ if f.Numerator != nil || f.Denominator != nil {
+ return errors.New("non-fraction fact cannot have numerator or denominator")
+ }
+ if (f.Precision == nil) == (f.Decimals == nil) {
+ return errors.New("non-fraction fact must have exactly one of precision or decimals")
+ }
+ if f.Precision != nil && !isValidPrecision(*f.Precision) {
+ return errors.New("non-fraction fact has invalid precision")
+ }
+ if f.Decimals != nil && !isValidDecimals(*f.Decimals) {
+ return errors.New("non-fraction fact has invalid decimals")
+ }
+ if _, err := strconv.ParseFloat(*f.ValueStr, 64); err != nil {
+ return err
+ }
case FactTypeNonNumeric:
- return f.ValueStr != nil
- default:
+ if f.ValueStr == nil {
+ return errors.New("non-numeric fact missing value")
+ }
+ if f.UnitRef != nil {
+ return errors.New("non-numeric fact cannot have unitRef")
+ }
+ if f.Precision != nil || f.Decimals != nil {
+ return errors.New("non-numeric fact cannot have precision or decimals")
+ }
+ if f.Numerator != nil || f.Denominator != nil {
+ return errors.New("non-numeric fact cannot have numerator or denominator")
+ }
+ }
+
+ return nil
+}
+
+func isValidPrecision(precision string) bool {
+ if precision == "INF" {
+ return true
+ }
+
+ value, err := strconv.Atoi(precision)
+ return err == nil && value >= 0
+}
+
+func isValidDecimals(decimals string) bool {
+ if decimals == "INF" {
return true
}
+
+ _, err := strconv.Atoi(decimals)
+ return err == nil
}
// NumericValue attempts to return the numeric value this fact represents.
-// This function returns
// If this fact is a fraction type, this function returns the value of numerator / denominator.
// Note that fraction type facts generally cannot be precisely represented as a float64 and may have some rounding error.
func (f Fact) NumericValue() (float64, error) {
switch f.Type() {
case FactTypeFraction:
+ if f.Numerator == nil {
+ return 0, errors.New("fraction fact missing numerator")
+ }
+ if f.Denominator == nil {
+ return 0, errors.New("fraction fact missing denominator")
+ }
+ if *f.Denominator == 0 {
+ return 0, errors.New("fraction fact denominator is zero")
+ }
return *f.Numerator / *f.Denominator, nil
case FactTypeNonFraction:
+ if f.ValueStr == nil {
+ return 0, errors.New("non-fraction fact missing value")
+ }
return strconv.ParseFloat(*f.ValueStr, 64)
default:
return 0, ErrNonNumericFactType
diff --git a/fact_test.go b/fact_test.go
index 70f77b4..8f7baae 100644
--- a/fact_test.go
+++ b/fact_test.go
@@ -108,3 +108,135 @@ func TestUnmarshalFact(t *testing.T) {
assert.EqualValues(t, 1.0/3.0, val)
})
}
+
+func TestFactValidation(t *testing.T) {
+ unitRef := "u1"
+ precision := "3"
+ decimals := "2"
+ invalidPrecision := "-1"
+ invalidDecimals := "not-an-integer"
+
+ tests := []struct {
+ name string
+ fact Fact
+ wantErr string
+ }{
+ {
+ name: "fact requires context ref",
+ fact: Fact{
+ UnitRef: &unitRef,
+ Precision: &precision,
+ ValueStr: stringPtr("727"),
+ },
+ wantErr: "missing contextRef",
+ },
+ {
+ name: "non-fraction missing value",
+ fact: Fact{
+ ContextRef: "c1",
+ UnitRef: &unitRef,
+ Precision: &precision,
+ },
+ wantErr: "non-fraction fact missing value",
+ },
+ {
+ name: "non-fraction with precision and decimals",
+ fact: Fact{
+ ContextRef: "c1",
+ UnitRef: &unitRef,
+ Precision: &precision,
+ Decimals: &decimals,
+ ValueStr: stringPtr("727"),
+ },
+ wantErr: "non-fraction fact must have exactly one of precision or decimals",
+ },
+ {
+ name: "non-fraction with invalid precision",
+ fact: Fact{
+ ContextRef: "c1",
+ UnitRef: &unitRef,
+ Precision: &invalidPrecision,
+ ValueStr: stringPtr("727"),
+ },
+ wantErr: "non-fraction fact has invalid precision",
+ },
+ {
+ name: "non-fraction with invalid decimals",
+ fact: Fact{
+ ContextRef: "c1",
+ UnitRef: &unitRef,
+ Decimals: &invalidDecimals,
+ ValueStr: stringPtr("727"),
+ },
+ wantErr: "non-fraction fact has invalid decimals",
+ },
+ {
+ name: "non-numeric with precision",
+ fact: Fact{
+ ContextRef: "c1",
+ Precision: &precision,
+ ValueStr: stringPtr("not numeric"),
+ },
+ wantErr: "non-numeric fact cannot have precision or decimals",
+ },
+ {
+ name: "fraction with zero denominator",
+ fact: Fact{
+ ContextRef: "c1",
+ UnitRef: &unitRef,
+ Numerator: floatPtr(1),
+ Denominator: floatPtr(0),
+ },
+ wantErr: "fraction fact denominator is zero",
+ },
+ {
+ name: "fraction with precision",
+ fact: Fact{
+ ContextRef: "c1",
+ UnitRef: &unitRef,
+ Precision: &precision,
+ Numerator: floatPtr(1),
+ Denominator: floatPtr(3),
+ },
+ wantErr: "fraction fact cannot have precision or decimals",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ err := tt.fact.Validate()
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), tt.wantErr)
+ assert.False(t, tt.fact.IsValid())
+ })
+ }
+}
+
+func TestNilFactValidation(t *testing.T) {
+ nilValue := true
+ fact := Fact{
+ XMLName: xml.Name{Space: "myns", Local: "nilFact"},
+ Nil: &nilValue,
+ ContextRef: "c1",
+ }
+
+ assert.Equal(t, FactTypeNil, fact.Type())
+ assert.NoError(t, fact.Validate())
+}
+
+func TestNumericValueMalformedFactReturnsError(t *testing.T) {
+ unitRef := "u1"
+ precision := "3"
+ fact := Fact{
+ ContextRef: "c1",
+ UnitRef: &unitRef,
+ Precision: &precision,
+ }
+
+ _, err := fact.NumericValue()
+ assert.Error(t, err)
+}
+
+func floatPtr(val float64) *float64 {
+ return &val
+}
diff --git a/fuzz_test.go b/fuzz_test.go
new file mode 100644
index 0000000..f7370a2
--- /dev/null
+++ b/fuzz_test.go
@@ -0,0 +1,21 @@
+//go:build go1.18
+// +build go1.18
+
+package xbrl
+
+import "testing"
+
+func FuzzParseAndValidate(f *testing.F) {
+ f.Add([]byte(``))
+ f.Add([]byte(`e`))
+ f.Add([]byte(`727`))
+
+ f.Fuzz(func(t *testing.T, data []byte) {
+ doc, err := Parse(data)
+ if err != nil {
+ return
+ }
+
+ _ = doc.Validate()
+ })
+}
diff --git a/parse.go b/parse.go
new file mode 100644
index 0000000..5d20fe6
--- /dev/null
+++ b/parse.go
@@ -0,0 +1,44 @@
+package xbrl
+
+import (
+ "encoding/xml"
+ "errors"
+ "io"
+)
+
+// Parse unmarshals an XBRL instance document from data.
+// It does not call XBRL.Validate; parsing and structural validation are separate operations.
+func Parse(data []byte) (XBRL, error) {
+ var doc XBRL
+ if err := xml.Unmarshal(data, &doc); err != nil {
+ return XBRL{}, err
+ }
+
+ return doc, nil
+}
+
+// ParseReader decodes an XBRL instance document from r using encoding/xml.
+// It does not call XBRL.Validate.
+func ParseReader(r io.Reader) (XBRL, error) {
+ if r == nil {
+ return XBRL{}, errors.New("nil reader")
+ }
+
+ return Decode(xml.NewDecoder(r))
+}
+
+// Decode decodes an XBRL instance document with decoder.
+// Use this helper when callers need to configure xml.Decoder, such as setting CharsetReader.
+// It does not call XBRL.Validate.
+func Decode(decoder *xml.Decoder) (XBRL, error) {
+ if decoder == nil {
+ return XBRL{}, errors.New("nil decoder")
+ }
+
+ var doc XBRL
+ if err := decoder.Decode(&doc); err != nil {
+ return XBRL{}, err
+ }
+
+ return doc, nil
+}
diff --git a/parse_test.go b/parse_test.go
new file mode 100644
index 0000000..471e255
--- /dev/null
+++ b/parse_test.go
@@ -0,0 +1,56 @@
+package xbrl
+
+import (
+ "encoding/xml"
+ "io"
+ "os"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseHelpers(t *testing.T) {
+ t.Run("Parse does not validate", func(t *testing.T) {
+ xbrlBytes, err := os.ReadFile("test_data/invalid_xbrl.xml")
+ require.NoError(t, err)
+
+ doc, err := Parse(xbrlBytes)
+ require.NoError(t, err)
+
+ assert.Error(t, doc.Validate())
+ })
+
+ t.Run("ParseReader", func(t *testing.T) {
+ doc, err := ParseReader(strings.NewReader(``))
+ require.NoError(t, err)
+
+ assert.Equal(t, xml.Name{Local: "xbrl"}, doc.XMLName)
+ })
+
+ t.Run("Decode uses caller configured decoder", func(t *testing.T) {
+ f, err := os.Open("test_data/edgr-2004_10k.xml")
+ require.NoError(t, err)
+ defer f.Close()
+
+ decoder := xml.NewDecoder(f)
+ decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
+ return input, nil
+ }
+
+ doc, err := Decode(decoder)
+ require.NoError(t, err)
+
+ assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "xbrl"}, doc.XMLName)
+ assert.Len(t, doc.ContextsByID, 4)
+ })
+
+ t.Run("nil inputs", func(t *testing.T) {
+ _, err := ParseReader(nil)
+ assert.EqualError(t, err, "nil reader")
+
+ _, err = Decode(nil)
+ assert.EqualError(t, err, "nil decoder")
+ })
+}
diff --git a/unit.go b/unit.go
index 2aa3187..53a28b3 100644
--- a/unit.go
+++ b/unit.go
@@ -1,13 +1,18 @@
package xbrl
-import "strings"
+import (
+ "errors"
+ "strings"
+)
// Unit specifies the unit in which a numeric fact has been measured.
// A Unit can be either a simple measure, product of measures, or a ratio of products of measures with a numerator and a denominator.
//
// A simple unit that represents shares looks like:
//
-// shares
+//
+// shares
+//
//
//
// Numeric Facts reference units by ID via the Fact's `unitRef` attribute.
@@ -18,17 +23,19 @@ type Unit struct {
Divide *Divide `xml:"divide"`
}
-// Divide represents a ratios of Units that has a numerator and a denominator.
+// Divide represents a ratio of units that has a numerator and a denominator.
// For example, XBRL can represent a complex unit like earnings per share (EPS) as dollars per share (USD / share):
//
-//
-//
-// iso4127:USD
-//
-//
-// shares
-//
-//
+//
+//
+//
+// iso4217:USD
+//
+//
+// shares
+//
+//
+//
//
//
// https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.8.2
@@ -47,22 +54,103 @@ type Measure struct {
Value string `xml:",chardata"`
}
+// Measures is a product of one or more Measure values.
type Measures []Measure
+// Validate checks that u has the structural fields required by XBRL.
+func (u Unit) Validate() error {
+ if u.ID == "" {
+ return errors.New("unit missing id")
+ }
+ if (len(u.Measures) == 0) == (u.Divide == nil) {
+ return errors.New("unit must have either measures or divide")
+ }
+
+ if u.Divide != nil {
+ return u.Divide.Validate()
+ }
+
+ return u.Measures.Validate()
+}
+
+// IsValid validates u and returns true if no error was found.
+func (u Unit) IsValid() bool {
+ return u.Validate() == nil
+}
+
+// Validate checks that d has numerator and denominator measures.
+func (d Divide) Validate() error {
+ if len(d.Numerator) == 0 {
+ return errors.New("divide missing numerator measures")
+ }
+ if len(d.Denominator) == 0 {
+ return errors.New("divide missing denominator measures")
+ }
+ if err := d.Numerator.Validate(); err != nil {
+ return err
+ }
+ if err := d.Denominator.Validate(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// IsValid validates d and returns true if no error was found.
+func (d Divide) IsValid() bool {
+ return d.Validate() == nil
+}
+
+// Validate checks that m contains a non-empty measure value.
+func (m Measure) Validate() error {
+ if m.Value == "" {
+ return errors.New("measure missing value")
+ }
+
+ return nil
+}
+
+// IsValid validates m and returns true if no error was found.
+func (m Measure) IsValid() bool {
+ return m.Validate() == nil
+}
+
+// Validate checks that m has at least one measure and that each measure has a value.
+func (m Measures) Validate() error {
+ if len(m) == 0 {
+ return errors.New("measures missing values")
+ }
+
+ for _, measure := range m {
+ if err := measure.Validate(); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// IsValid validates m and returns true if no error was found.
+func (m Measures) IsValid() bool {
+ return m.Validate() == nil
+}
+
// String returns a human readable representation of the Unit.
func (u Unit) String() string {
- // If the Divide element is not nil, there can be no top-level Meaures.
+ // If the Divide element is not nil, there can be no top-level Measures.
if u.Divide != nil {
return u.Divide.Numerator.String() + " / " + u.Divide.Denominator.String()
}
- // If the divider element is nil, there must be 1+ top-level Measures.
+ // If the Divide element is nil, there must be 1+ top-level Measures.
return u.Measures.String()
}
// String returns the local name of the measure if the value is formatted as 'xsd:Qname', otherwise the value itself is returned.
-// Ex: `iso4127:USD` -> "USD"
-// `shares` -> "shares"
+// This is a display helper only. Use Measure.Value when the raw XBRL value is significant.
+// Ex: `iso4217:USD` -> "USD"
+//
+// `shares` -> "shares"
func (m Measure) String() string {
if index := strings.IndexRune(m.Value, ':'); index != -1 && index < len(m.Value) {
return m.Value[index+1 : len(m.Value)]
diff --git a/unit_test.go b/unit_test.go
index 541c0b3..ab44587 100644
--- a/unit_test.go
+++ b/unit_test.go
@@ -106,3 +106,68 @@ func TestUnmarshalUnit(t *testing.T) {
assert.Equal(t, "USD / feet * feet", unit.String())
})
}
+
+func TestUnitValidation(t *testing.T) {
+ tests := []struct {
+ name string
+ unit Unit
+ wantErr string
+ }{
+ {
+ name: "unit requires id",
+ unit: Unit{Measures: Measures{{Value: "shares"}}},
+ wantErr: "unit missing id",
+ },
+ {
+ name: "unit requires measure or divide",
+ unit: Unit{ID: "u1"},
+ wantErr: "unit must have either measures or divide",
+ },
+ {
+ name: "unit cannot have measures and divide",
+ unit: Unit{
+ ID: "u1",
+ Measures: Measures{{Value: "shares"}},
+ Divide: &Divide{
+ Numerator: Measures{{Value: "iso4217:USD"}},
+ Denominator: Measures{{Value: "shares"}},
+ },
+ },
+ wantErr: "unit must have either measures or divide",
+ },
+ {
+ name: "divide requires denominator measures",
+ unit: Unit{
+ ID: "u1",
+ Divide: &Divide{
+ Numerator: Measures{{Value: "iso4217:USD"}},
+ },
+ },
+ wantErr: "divide missing denominator measures",
+ },
+ {
+ name: "measure requires value",
+ unit: Unit{
+ ID: "u1",
+ Measures: Measures{{}},
+ },
+ wantErr: "measure missing value",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ err := tt.unit.Validate()
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), tt.wantErr)
+ assert.False(t, tt.unit.IsValid())
+ })
+ }
+}
+
+func TestMeasureValuePreservesRawQName(t *testing.T) {
+ measure := Measure{Value: "iso4217:USD"}
+
+ assert.Equal(t, "iso4217:USD", measure.Value)
+ assert.Equal(t, "USD", measure.String())
+}
diff --git a/xbrl.go b/xbrl.go
index b41a692..362be86 100644
--- a/xbrl.go
+++ b/xbrl.go
@@ -5,22 +5,44 @@ import (
"fmt"
)
-// NotImplemented represents an expected element in the XBRL that isn't handled yet, but should not be considered a Fact.
+const (
+ xbrlInstanceNamespace = "http://www.xbrl.org/2003/instance"
+)
+
+// NotImplemented represents a count of expected XBRL elements that are not handled in detail.
+// New code should prefer the RawElement fields that preserve XML names, attributes, and inner XML.
type NotImplemented []*struct{}
+// RawElement preserves an XML element that this package does not model in detail.
+type RawElement struct {
+ XMLName xml.Name
+ Attributes []xml.Attr `xml:",any,attr"`
+ InnerXML string `xml:",innerxml"`
+}
+
// RawXBRL represents the XML structure of an XBRL document.
// This is not a feature complete XBRL parser!
-// See the fields of type NotImplemented for an idea of what's missing.
+// See the fields of type RawElement and NotImplemented for an idea of what's missing.
// Also note that this struct doesn't support Tuple facts (https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.9)
//
// You can use this struct directly, but XBRL is structured in a more convenient way.
// See the comment on XBRL for more info.
type RawXBRL struct {
+ XMLName xml.Name
+ Attributes []xml.Attr
+
Contexts []Context `xml:"context"`
Units []Unit `xml:"unit"`
Facts []Fact `xml:",any"`
+ SchemaRefs []RawElement
+ LinkbaseRefs []RawElement
+ RoleRefs []RawElement
+ ArcRoleRefs []RawElement
+ FootnoteLinks []RawElement
+ UnsupportedTopLevel []RawElement
+
// The fields below are not properly implemented, but need to be here so they aren't lumped into the `Facts` slice.
SchemaRef NotImplemented `xml:"schemaRef"`
@@ -30,14 +52,160 @@ type RawXBRL struct {
FootnoteLink NotImplemented `xml:"footnoteLink"`
}
-// XBRL contains maps for contexts and units so they can be accessed easier when looping through facts.
+// UnmarshalXML implements xml.Unmarshaler and preserves the XBRL root envelope
+// while still routing taxonomy-defined top-level elements into Facts.
+func (r *RawXBRL) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+ *r = RawXBRL{
+ XMLName: start.Name,
+ Attributes: copyAttrs(start.Attr),
+ }
+
+ for {
+ token, err := d.Token()
+ if err != nil {
+ return err
+ }
+
+ switch token := token.(type) {
+ case xml.StartElement:
+ if err := r.decodeChild(d, token); err != nil {
+ return err
+ }
+ case xml.EndElement:
+ if token.Name == start.Name {
+ return nil
+ }
+ }
+ }
+}
+
+func (r *RawXBRL) decodeChild(d *xml.Decoder, start xml.StartElement) error {
+ switch start.Name.Local {
+ case "context":
+ var context Context
+ if err := d.DecodeElement(&context, &start); err != nil {
+ return err
+ }
+ r.Contexts = append(r.Contexts, context)
+ case "unit":
+ var unit Unit
+ if err := d.DecodeElement(&unit, &start); err != nil {
+ return err
+ }
+ r.Units = append(r.Units, unit)
+ case "schemaRef":
+ element, err := decodeRawElement(d, start)
+ if err != nil {
+ return err
+ }
+ r.SchemaRefs = append(r.SchemaRefs, element)
+ r.SchemaRef = append(r.SchemaRef, &struct{}{})
+ case "linkbaseRef":
+ element, err := decodeRawElement(d, start)
+ if err != nil {
+ return err
+ }
+ r.LinkbaseRefs = append(r.LinkbaseRefs, element)
+ r.LinkbaseRef = append(r.LinkbaseRef, &struct{}{})
+ case "roleRef":
+ element, err := decodeRawElement(d, start)
+ if err != nil {
+ return err
+ }
+ r.RoleRefs = append(r.RoleRefs, element)
+ r.RoleRef = append(r.RoleRef, &struct{}{})
+ case "arcroleRef":
+ element, err := decodeRawElement(d, start)
+ if err != nil {
+ return err
+ }
+ r.ArcRoleRefs = append(r.ArcRoleRefs, element)
+ r.ArcRoleRef = append(r.ArcRoleRef, &struct{}{})
+ case "footnoteLink":
+ element, err := decodeRawElement(d, start)
+ if err != nil {
+ return err
+ }
+ r.FootnoteLinks = append(r.FootnoteLinks, element)
+ r.FootnoteLink = append(r.FootnoteLink, &struct{}{})
+ default:
+ if isKnownUnsupportedTopLevel(start.Name) {
+ element, err := decodeRawElement(d, start)
+ if err != nil {
+ return err
+ }
+ r.UnsupportedTopLevel = append(r.UnsupportedTopLevel, element)
+ return nil
+ }
+
+ var fact Fact
+ if err := d.DecodeElement(&fact, &start); err != nil {
+ return err
+ }
+ r.Facts = append(r.Facts, fact)
+ }
+
+ return nil
+}
+
+func isKnownUnsupportedTopLevel(name xml.Name) bool {
+ return name.Space == xbrlInstanceNamespace && (name.Local == "item" || name.Local == "tuple")
+}
+
+func decodeRawElement(d *xml.Decoder, start xml.StartElement) (RawElement, error) {
+ var element RawElement
+ if err := d.DecodeElement(&element, &start); err != nil {
+ return RawElement{}, err
+ }
+
+ return element, nil
+}
+
+func copyAttrs(attrs []xml.Attr) []xml.Attr {
+ if len(attrs) == 0 {
+ return nil
+ }
+
+ copied := make([]xml.Attr, len(attrs))
+ copy(copied, attrs)
+ return copied
+}
+
+func copyRawElements(elements []RawElement) []RawElement {
+ if len(elements) == 0 {
+ return nil
+ }
+
+ copied := make([]RawElement, len(elements))
+ for index, element := range elements {
+ copied[index] = element
+ copied[index].Attributes = copyAttrs(element.Attributes)
+ }
+
+ return copied
+}
+
+// XBRL contains raw context and unit slices plus maps so contexts and units can be accessed easier when looping through facts.
// You can either unmarshal XML directly into this struct (it has a custom unmarshaller),
// or you can unmarshal XML into a RawXBRL struct and call NewProcessedXBRL(RawXBRL) to process the raw XBRL into this format.
type XBRL struct {
+ XMLName xml.Name
+ Attributes []xml.Attr
+
+ Contexts []Context
+ Units []Unit
+
ContextsByID map[string]Context
UnitsByID map[string]Unit
Facts []Fact
+
+ SchemaRefs []RawElement
+ LinkbaseRefs []RawElement
+ RoleRefs []RawElement
+ ArcRoleRefs []RawElement
+ FootnoteLinks []RawElement
+ UnsupportedTopLevel []RawElement
}
// NewProcessedXBRL constructs a XBRL struct from a RawXBRL struct.
@@ -54,9 +222,19 @@ func NewProcessedXBRL(raw RawXBRL) XBRL {
}
return XBRL{
- ContextsByID: contextsByID,
- UnitsByID: unitsByID,
- Facts: raw.Facts,
+ XMLName: raw.XMLName,
+ Attributes: copyAttrs(raw.Attributes),
+ Contexts: append([]Context(nil), raw.Contexts...),
+ Units: append([]Unit(nil), raw.Units...),
+ ContextsByID: contextsByID,
+ UnitsByID: unitsByID,
+ Facts: append([]Fact(nil), raw.Facts...),
+ SchemaRefs: copyRawElements(raw.SchemaRefs),
+ LinkbaseRefs: copyRawElements(raw.LinkbaseRefs),
+ RoleRefs: copyRawElements(raw.RoleRefs),
+ ArcRoleRefs: copyRawElements(raw.ArcRoleRefs),
+ FootnoteLinks: copyRawElements(raw.FootnoteLinks),
+ UnsupportedTopLevel: copyRawElements(raw.UnsupportedTopLevel),
}
}
@@ -72,20 +250,35 @@ func (x *XBRL) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
return nil
}
-// Validate checks that all Facts are valid and reference contexts and units that also exist.
-// Note that since this parser does not properly handle Tuple elements, it's possible that some malformed Facts were unmarshalled.
+// Validate checks basic XBRL structure: contexts, units, facts, duplicate IDs, and fact references.
+// It does not perform taxonomy-aware, accounting, linkbase, footnote, tuple, or scenario validation.
func (x XBRL) Validate() error {
+ contextsByID, err := x.validatedContextsByID()
+ if err != nil {
+ return err
+ }
+
+ unitsByID, err := x.validatedUnitsByID()
+ if err != nil {
+ return err
+ }
+
+ if len(x.UnsupportedTopLevel) > 0 {
+ element := x.UnsupportedTopLevel[0]
+ return fmt.Errorf("unsupported top-level element: %s:%s", element.XMLName.Space, element.XMLName.Local)
+ }
+
for _, fact := range x.Facts {
- if !fact.IsValid() {
- return fmt.Errorf("invalid fact: %s:%s", fact.XMLName.Space, fact.XMLName.Local)
+ if err := fact.Validate(); err != nil {
+ return fmt.Errorf("invalid fact (%s:%s): %w", fact.XMLName.Space, fact.XMLName.Local, err)
}
- if _, exists := x.ContextsByID[fact.ContextRef]; !exists {
+ if _, exists := contextsByID[fact.ContextRef]; !exists {
return fmt.Errorf("fact (%s:%s) references non-existent context: %s", fact.XMLName.Space, fact.XMLName.Local, fact.ContextRef)
}
if fact.UnitRef != nil {
- if _, exists := x.UnitsByID[*fact.UnitRef]; !exists {
+ if _, exists := unitsByID[*fact.UnitRef]; !exists {
return fmt.Errorf("fact (%s:%s) references non-existent unit: %s", fact.XMLName.Space, fact.XMLName.Local, *fact.UnitRef)
}
}
@@ -94,7 +287,47 @@ func (x XBRL) Validate() error {
return nil
}
-// IsValid validates the Facts in this struct and returns true if no error was found.
+// IsValid validates this struct and returns true if no error was found.
func (x XBRL) IsValid() bool {
return x.Validate() == nil
}
+
+func (x XBRL) validatedContextsByID() (map[string]Context, error) {
+ if len(x.Contexts) == 0 {
+ return x.ContextsByID, nil
+ }
+
+ contextsByID := make(map[string]Context, len(x.Contexts))
+ for _, context := range x.Contexts {
+ if err := context.Validate(); err != nil {
+ return nil, fmt.Errorf("invalid context (%s): %w", context.ID, err)
+ }
+ if _, exists := contextsByID[context.ID]; exists {
+ return nil, fmt.Errorf("duplicate context id: %s", context.ID)
+ }
+
+ contextsByID[context.ID] = context
+ }
+
+ return contextsByID, nil
+}
+
+func (x XBRL) validatedUnitsByID() (map[string]Unit, error) {
+ if len(x.Units) == 0 {
+ return x.UnitsByID, nil
+ }
+
+ unitsByID := make(map[string]Unit, len(x.Units))
+ for _, unit := range x.Units {
+ if err := unit.Validate(); err != nil {
+ return nil, fmt.Errorf("invalid unit (%s): %w", unit.ID, err)
+ }
+ if _, exists := unitsByID[unit.ID]; exists {
+ return nil, fmt.Errorf("duplicate unit id: %s", unit.ID)
+ }
+
+ unitsByID[unit.ID] = unit
+ }
+
+ return unitsByID, nil
+}
diff --git a/xbrl_test.go b/xbrl_test.go
index 090f580..58eda94 100644
--- a/xbrl_test.go
+++ b/xbrl_test.go
@@ -26,6 +26,29 @@ func TestUnmarshalXBRL(t *testing.T) {
assert.Equal(t, 283, len(content.ContextsByID))
assert.Equal(t, 9, len(content.UnitsByID))
assert.Equal(t, 1070, len(content.Facts))
+ require.Len(t, content.SchemaRefs, 1)
+ assert.Equal(t, "aapl-20210327.xsd", requireAttr(t, content.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+
+ durationContext := content.ContextsByID["i02c0f3e92d75432fbe3c6a24022bf7b0_D20200927-20210327"]
+ assert.Equal(t, PeriodTypeDuration, durationContext.Period.Type())
+ assert.Equal(t, "2020-09-27", *durationContext.Period.StartDate)
+ assert.Equal(t, "2021-03-27", *durationContext.Period.EndDate)
+
+ segmentedContext := content.ContextsByID["iff44040cd61344d085f7a2b7a1076cb1_D20200927-20210327"]
+ require.Len(t, segmentedContext.Entity.Segments, 1)
+ assert.Equal(t, xml.Name{Space: "http://xbrl.org/2006/xbrldi", Local: "explicitMember"}, segmentedContext.Entity.Segments[0].XMLName)
+ assert.Equal(t, "us-gaap:CommonStockMember", segmentedContext.Entity.Segments[0].Value)
+ assert.Equal(t, "us-gaap:StatementClassOfStockAxis", requireAttr(t, segmentedContext.Entity.Segments[0].Attributes, "", "dimension"))
+
+ usdPerShare := content.UnitsByID["usdPerShare"]
+ require.NotNil(t, usdPerShare.Divide)
+ assert.Equal(t, "iso4217:USD", usdPerShare.Divide.Numerator[0].Value)
+ assert.Equal(t, "USD / shares", usdPerShare.String())
+
+ eps := requireFact(t, content.Facts, "http://fasb.org/us-gaap/2020-01-31", "EarningsPerShareBasic", "1.41")
+ assert.Equal(t, "ia09408265617434fbc06a7e4c6b101bc_D20201227-20210327", eps.ContextRef)
+ require.NotNil(t, eps.UnitRef)
+ assert.Equal(t, "usdPerShare", *eps.UnitRef)
})
t.Run("real-world xbrl from 2004", func(t *testing.T) {
@@ -46,6 +69,20 @@ func TestUnmarshalXBRL(t *testing.T) {
assert.Equal(t, 4, len(content.ContextsByID))
assert.Equal(t, 2, len(content.UnitsByID))
assert.Equal(t, 154, len(content.Facts))
+ require.Len(t, content.SchemaRefs, 1)
+ assert.Equal(t, "edgr-20050228.xsd", requireAttr(t, content.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+
+ firstFact := content.Facts[0]
+ assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/us/fr/common/pte/2005-02-28", Local: "AccountsPayable"}, firstFact.XMLName)
+ assert.Equal(t, "edgr_4473_inst_YTD_20041231", firstFact.ContextRef)
+ require.NotNil(t, firstFact.UnitRef)
+ assert.Equal(t, "USD", *firstFact.UnitRef)
+ assert.Equal(t, "995000", firstFact.Value())
+
+ instantContext := content.ContextsByID["edgr_4473_inst_YTD_20041231"]
+ assert.Equal(t, PeriodTypeInstant, instantContext.Period.Type())
+ assert.Equal(t, "2004-12-31", *instantContext.Period.Instant)
+ assert.Equal(t, "USD", content.UnitsByID["USD"].String())
})
t.Run("simple xbrl happy path", func(t *testing.T) {
@@ -57,6 +94,13 @@ func TestUnmarshalXBRL(t *testing.T) {
require.NoError(t, xml.Unmarshal(xbrlBytes, &content))
require.NoError(t, content.Validate())
+ assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/instance", Local: "xbrl"}, content.XMLName)
+ assert.NotEmpty(t, content.Attributes)
+ require.Len(t, content.SchemaRefs, 1)
+ assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/linkbase", Local: "schemaRef"}, content.SchemaRefs[0].XMLName)
+ assert.Len(t, content.Contexts, 1)
+ assert.Len(t, content.Units, 1)
+
require.Len(t, content.ContextsByID, 1)
expectedContext := Context{
ID: "c1",
@@ -111,6 +155,148 @@ func TestUnmarshalXBRL(t *testing.T) {
})
}
+func TestUnmarshalRawXBRLPreservesEnvelope(t *testing.T) {
+ xbrlBytes, err := os.ReadFile("test_data/simple_xbrl.xml")
+ require.NoError(t, err)
+
+ var raw RawXBRL
+ require.NoError(t, xml.Unmarshal(xbrlBytes, &raw))
+
+ assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/instance", Local: "xbrl"}, raw.XMLName)
+ assert.NotEmpty(t, raw.Attributes)
+ require.Len(t, raw.SchemaRefs, 1)
+ assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/linkbase", Local: "schemaRef"}, raw.SchemaRefs[0].XMLName)
+ assert.NotEmpty(t, raw.SchemaRefs[0].Attributes)
+ assert.Len(t, raw.SchemaRef, 1)
+ assert.Len(t, raw.Facts, 2)
+}
+
+func TestUnmarshalRawXBRLPreservesReferenceAndUnsupportedElements(t *testing.T) {
+ // language=xml
+ doc := `
+
+
+
+
+
+ Preserved footnote text
+
+
+ nested
+`
+
+ var raw RawXBRL
+ require.NoError(t, xml.Unmarshal([]byte(doc), &raw))
+
+ require.Len(t, raw.SchemaRefs, 1)
+ assert.Equal(t, "example.xsd", requireAttr(t, raw.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+ assert.Len(t, raw.SchemaRef, 1)
+
+ require.Len(t, raw.LinkbaseRefs, 1)
+ assert.Equal(t, "labels.xml", requireAttr(t, raw.LinkbaseRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+ assert.Len(t, raw.LinkbaseRef, 1)
+
+ require.Len(t, raw.RoleRefs, 1)
+ assert.Equal(t, "http://example.com/role", requireAttr(t, raw.RoleRefs[0].Attributes, "", "roleURI"))
+ assert.Len(t, raw.RoleRef, 1)
+
+ require.Len(t, raw.ArcRoleRefs, 1)
+ assert.Equal(t, "http://example.com/arcrole", requireAttr(t, raw.ArcRoleRefs[0].Attributes, "", "arcroleURI"))
+ assert.Len(t, raw.ArcRoleRef, 1)
+
+ require.Len(t, raw.FootnoteLinks, 1)
+ assert.Contains(t, raw.FootnoteLinks[0].InnerXML, "Preserved footnote text")
+ assert.Len(t, raw.FootnoteLink, 1)
+
+ require.Len(t, raw.UnsupportedTopLevel, 2)
+ assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "item"}, raw.UnsupportedTopLevel[0].XMLName)
+ assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "tuple"}, raw.UnsupportedTopLevel[1].XMLName)
+ assert.Contains(t, raw.UnsupportedTopLevel[1].InnerXML, "nested")
+ assert.Empty(t, raw.Facts)
+}
+
+func TestValidateRejectsDuplicateContextIDs(t *testing.T) {
+ // language=xml
+ doc := `
+
+
+ 0000320193
+ 2021-03-27
+
+
+ 0000320193
+ 2021-03-28
+
+ shares
+ 727
+`
+
+ var content XBRL
+ require.NoError(t, xml.Unmarshal([]byte(doc), &content))
+
+ assert.EqualError(t, content.Validate(), "duplicate context id: c1")
+}
+
+func TestValidateRejectsDuplicateUnitIDs(t *testing.T) {
+ // language=xml
+ doc := `
+
+
+ 0000320193
+ 2021-03-27
+
+ shares
+ iso4217:USD
+ 727
+`
+
+ var content XBRL
+ require.NoError(t, xml.Unmarshal([]byte(doc), &content))
+
+ assert.EqualError(t, content.Validate(), "duplicate unit id: u1")
+}
+
+func TestValidateRejectsKnownUnsupportedTopLevelElements(t *testing.T) {
+ // language=xml
+ doc := `
+
+`
+
+ var content XBRL
+ require.NoError(t, xml.Unmarshal([]byte(doc), &content))
+
+ require.Len(t, content.UnsupportedTopLevel, 1)
+ assert.EqualError(t, content.Validate(), "unsupported top-level element: http://www.xbrl.org/2003/instance:tuple")
+}
+
func stringPtr(str string) *string {
return &str
}
+
+func requireAttr(t *testing.T, attrs []xml.Attr, space, local string) string {
+ t.Helper()
+
+ for _, attr := range attrs {
+ if attr.Name.Space == space && attr.Name.Local == local {
+ return attr.Value
+ }
+ }
+
+ require.Failf(t, "missing XML attribute", "space=%q local=%q attrs=%v", space, local, attrs)
+ return ""
+}
+
+func requireFact(t *testing.T, facts []Fact, space, local, value string) Fact {
+ t.Helper()
+
+ for _, fact := range facts {
+ if fact.XMLName.Space == space && fact.XMLName.Local == local && fact.Value() == value {
+ return fact
+ }
+ }
+
+ require.Failf(t, "missing fact", "space=%q local=%q value=%q", space, local, value)
+ return Fact{}
+}