From a9117796d9437143f94eec9d197bd4a384cd207a Mon Sep 17 00:00:00 2001 From: Thomas Marcelis Date: Sat, 2 May 2026 15:19:50 +0200 Subject: [PATCH] Preserve raw XBRL and tighten validation --- README.md | 6 +- context.go | 115 +++++++++++++++-- context_test.go | 92 ++++++++++++++ doc.go | 17 +++ example_unmarshal_test.go | 31 +++-- fact.go | 100 +++++++++++++-- fact_test.go | 132 +++++++++++++++++++ fuzz_test.go | 21 ++++ parse.go | 44 +++++++ parse_test.go | 56 +++++++++ unit.go | 118 ++++++++++++++--- unit_test.go | 65 ++++++++++ xbrl.go | 259 ++++++++++++++++++++++++++++++++++++-- xbrl_test.go | 186 +++++++++++++++++++++++++++ 14 files changed, 1181 insertions(+), 61 deletions(-) create mode 100644 doc.go create mode 100644 fuzz_test.go create mode 100644 parse.go create mode 100644 parse_test.go diff --git a/README.md b/README.md index 9fb92ac..b690377 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,14 @@ This library is based around the [XBRL 2.1 spec](https://www.xbrl.org/Specificat It implements support for parsing basic facts (not tuples of facts), contexts and units through the `xml.Unmarshaler` interface. See the package example in the godocs for how to unmarshal into the `XBRL` struct. +You can also use `Parse`, `ParseReader`, or `Decode` as small convenience helpers around the same `encoding/xml` path. -This library supports basic validation that checks for malformed facts and broken references between facts and contexts/units (see `XBRL.Validate()`), +This library supports structural validation that checks malformed contexts, units, facts, duplicate IDs, unsupported scenarios, unsupported top-level base `item` and `tuple` elements, and broken references between facts and contexts/units (see `XBRL.Validate()`), but it does _not_ implement full semantic validation of XBRL documents. There are no abstractions added on-top of the XBRL data structure, which makes this library flexible and simple, but it also means you might have to read up a bit on how XBRL works to take full advantage of it. +The parser preserves lower-level XML details such as root attributes, XML names, raw link/reference elements, and generic segment content for callers that need them. To give you a head start, here's some basics about XBRL: @@ -52,7 +54,7 @@ The above fact doesn't directly tell us in which quarter EPS was `1.41`. That's ### Contexts A [Context](https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7) -describes a business entity, period of time, and an optional scenario (this library doesn't currently support scenarios, so we're going to gloss over them). +describes a business entity, period of time, and an optional scenario (this library preserves scenario XML, but does not interpret scenario semantics). When a fact references a context, it gives the fact more detail to help us understand what it means. diff --git a/context.go b/context.go index 057bcb3..f5a6f41 100644 --- a/context.go +++ b/context.go @@ -1,15 +1,20 @@ package xbrl -import "encoding/xml" +import ( + "encoding/xml" + "errors" +) -// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario (scenario is NOT implemented). +// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario. // All of which are necessary for understanding a business Fact captured as an XBRL item. +// Scenario is preserved as raw XML, but scenario validation and interpretation are not implemented. // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7 type Context struct { ID string `xml:"id,attr"` - Period Period `xml:"period"` - Entity Entity `xml:"entity"` + Period Period `xml:"period"` + Entity Entity `xml:"entity"` + Scenario *RawElement `xml:"scenario"` } // Entity documents the business entity for a Context (business, government department, individual, etc.). @@ -19,6 +24,23 @@ type Entity struct { Segments Segments `xml:"segment"` } +// Validate checks that e contains the structural fields required by XBRL. +func (e Entity) Validate() error { + if e.Identifier.Scheme == "" { + return errors.New("entity identifier missing scheme") + } + if e.Identifier.Value == "" { + return errors.New("entity identifier missing value") + } + + return nil +} + +// IsValid validates e and returns true if no error was found. +func (e Entity) IsValid() bool { + return e.Validate() == nil +} + // Identifier specifies a scheme for identifying business entities and an identifier that follows the scheme. // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.3.1 // For Example: @@ -43,6 +65,7 @@ type Segment struct { XMLName xml.Name Attributes []xml.Attr `xml:",any,attr"` Value string `xml:",chardata"` + InnerXML string `xml:",innerxml"` } // UnmarshalXML implements xml.Unmarshaller for Segments. @@ -60,22 +83,27 @@ func (s *Segments) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { return nil } +// PeriodType describes which supported shape a Period has. type PeriodType string // All the supported PeriodType values. See Period.Type() for more information. const ( + // PeriodTypeDuration is a period with startDate and endDate. PeriodTypeDuration PeriodType = "duration" - PeriodTypeInstant PeriodType = "instant" - PeriodTypeForever PeriodType = "forever" - PeriodTypeInvalid PeriodType = "invalid" + // PeriodTypeInstant is a period with instant. + PeriodTypeInstant PeriodType = "instant" + // PeriodTypeForever is a period with forever. + PeriodTypeForever PeriodType = "forever" + // PeriodTypeInvalid is a period that does not match exactly one supported shape. + PeriodTypeInvalid PeriodType = "invalid" ) // Period contains an instant or interval of time for a Context. // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.2 type Period struct { - // StartDate is non-nil and guaranteed to be before EndDate if Period.Type() returns Duration. + // StartDate is non-nil if Period.Type() returns Duration. StartDate *string `xml:"startDate"` - // EndDate is non-nil and guaranteed to be after StartDate if Period.Type() returns Duration. + // EndDate is non-nil if Period.Type() returns Duration. EndDate *string `xml:"endDate"` // Instant is non-nil if Period.Type() returns Instant @@ -90,17 +118,78 @@ type Period struct { // Type returns the type of this period to help clarify what fields in the Period struct are non-nil and valid to use. // The comments on the attributes inside the Period struct explain when they can be used depending on what this function returns. func (p Period) Type() PeriodType { + periodType := PeriodTypeInvalid + matches := 0 + if p.Forever != nil { - return PeriodTypeForever + periodType = PeriodTypeForever + matches++ } if p.Instant != nil { - return PeriodTypeInstant + periodType = PeriodTypeInstant + matches++ } if p.StartDate != nil && p.EndDate != nil { - return PeriodTypeDuration + periodType = PeriodTypeDuration + matches++ + } + + if matches != 1 { + return PeriodTypeInvalid + } + + return periodType +} + +// Validate checks that p has exactly one supported XBRL period shape. +func (p Period) Validate() error { + switch p.Type() { + case PeriodTypeDuration: + if *p.StartDate == "" { + return errors.New("duration period missing startDate") + } + if *p.EndDate == "" { + return errors.New("duration period missing endDate") + } + case PeriodTypeInstant: + if *p.Instant == "" { + return errors.New("instant period missing value") + } + case PeriodTypeForever: + return nil + default: + return errors.New("period must have exactly one of duration, instant, or forever") } - return PeriodTypeInvalid + return nil +} + +// IsValid validates p and returns true if no error was found. +func (p Period) IsValid() bool { + return p.Validate() == nil +} + +// Validate checks that c contains the structural fields this parser supports. +func (c Context) Validate() error { + if c.ID == "" { + return errors.New("context missing id") + } + if err := c.Entity.Validate(); err != nil { + return err + } + if err := c.Period.Validate(); err != nil { + return err + } + if c.Scenario != nil { + return errors.New("scenario is not supported") + } + + return nil +} + +// IsValid validates c and returns true if no error was found. +func (c Context) IsValid() bool { + return c.Validate() == nil } diff --git a/context_test.go b/context_test.go index 18fbdf1..b41fc38 100644 --- a/context_test.go +++ b/context_test.go @@ -74,13 +74,105 @@ func TestUnmarshalContext(t *testing.T) { assert.Equal(t, xml.Name{Space: "xbrldi", Local: "explicitMember"}, context.Entity.Segments[0].XMLName) assert.Equal(t, []xml.Attr{{Name: xml.Name{Local: "dimension"}, Value: "us-gaap:StatementClassOfStockAxis"}}, context.Entity.Segments[0].Attributes) assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].Value) + assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].InnerXML) assert.Equal(t, xml.Name{Space: "myns", Local: "cool_segment"}, context.Entity.Segments[1].XMLName) assert.Empty(t, context.Entity.Segments[1].Attributes) assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].Value) + assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].InnerXML) assert.Equal(t, PeriodTypeDuration, context.Period.Type()) assert.Equal(t, "2020-09-27", *context.Period.StartDate) assert.Equal(t, "2021-03-27", *context.Period.EndDate) }) + + t.Run("segment preserves nested raw XML", func(t *testing.T) { + // language=xml + contextXML := ` + + 0000320193 + + value + + + +` + + var context Context + require.NoError(t, xml.Unmarshal([]byte(contextXML), &context)) + + require.Len(t, context.Entity.Segments, 1) + assert.Equal(t, xml.Name{Space: "dim", Local: "typedMember"}, context.Entity.Segments[0].XMLName) + assert.Contains(t, context.Entity.Segments[0].InnerXML, "value") + }) +} + +func TestContextValidation(t *testing.T) { + t.Run("period must have exactly one shape", func(t *testing.T) { + period := Period{ + StartDate: stringPtr("2020-09-27"), + EndDate: stringPtr("2021-03-27"), + Instant: stringPtr("2021-03-27"), + } + + assert.Equal(t, PeriodTypeInvalid, period.Type()) + assert.False(t, period.IsValid()) + }) + + t.Run("duration requires start and end values", func(t *testing.T) { + period := Period{ + StartDate: stringPtr("2020-09-27"), + EndDate: stringPtr(""), + } + + assert.EqualError(t, period.Validate(), "duration period missing endDate") + }) + + t.Run("entity requires identifier scheme and value", func(t *testing.T) { + entity := Entity{ + Identifier: Identifier{ + Scheme: "http://www.sec.gov/CIK", + }, + } + + assert.EqualError(t, entity.Validate(), "entity identifier missing value") + }) + + t.Run("context requires id and entity identifier", func(t *testing.T) { + context := Context{ + Period: Period{Instant: stringPtr("2021-03-27")}, + Entity: Entity{ + Identifier: Identifier{ + Scheme: "http://www.sec.gov/CIK", + Value: "0000320193", + }, + }, + } + + assert.EqualError(t, context.Validate(), "context missing id") + assert.False(t, context.IsValid()) + }) + + t.Run("scenario is preserved but unsupported by validation", func(t *testing.T) { + // language=xml + contextXML := ` + + 0000320193 + + + 2021-03-27 + + + true + +` + + var context Context + require.NoError(t, xml.Unmarshal([]byte(contextXML), &context)) + + require.NotNil(t, context.Scenario) + assert.Equal(t, xml.Name{Local: "scenario"}, context.Scenario.XMLName) + assert.Contains(t, context.Scenario.InnerXML, "forecast") + assert.EqualError(t, context.Validate(), "scenario is not supported") + }) } diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..66c8735 --- /dev/null +++ b/doc.go @@ -0,0 +1,17 @@ +// Package xbrl parses XBRL 2.1 instance documents into simple Go data. +// +// The package preserves XBRL concepts such as facts, contexts, periods, +// entities, segments, units, XML names, attributes, and raw reference elements. +// It does not load taxonomies, resolve linkbases, normalize financial +// statements, transform Inline XBRL, or perform accounting-rule validation. +// +// XML unmarshalling is a first-class API: +// +// var doc xbrl.XBRL +// err := xml.Unmarshal(data, &doc) +// +// Parse, ParseReader, and Decode are convenience helpers around the same +// encoding/xml path. Parsing and validation are separate operations; call +// XBRL.Validate when you need structural checks for contexts, units, facts, and +// references. +package xbrl diff --git a/example_unmarshal_test.go b/example_unmarshal_test.go index 5abdf74..4cd0ceb 100644 --- a/example_unmarshal_test.go +++ b/example_unmarshal_test.go @@ -7,7 +7,11 @@ import ( "github.com/massive-com/xbrl-parser/v2" ) -const doc = ` +const doc = ` @@ -32,12 +36,11 @@ func Example() { if err := xml.Unmarshal([]byte(doc), &processed); err != nil { panic(err) } - - fact := processed.Facts[0] - if !fact.IsValid() { - panic("fact invalid!") + if err := processed.Validate(); err != nil { + panic(err) } + fact := processed.Facts[0] factType := fact.Type() numericValue, err := fact.NumericValue() @@ -48,9 +51,23 @@ func Example() { panic(err) } - fmt.Printf("Fact: %s:%s (type: %s)\n", fact.XMLName.Space, fact.XMLName.Local, factType) + fmt.Printf("Fact: %s (namespace: %s, type: %s)\n", fact.XMLName.Local, fact.XMLName.Space, factType) fmt.Printf(" %.0f %s on %s\n", numericValue, factUnit.String(), *factContext.Period.Instant) - // Output: Fact: ci:assets (type: non_fraction) + // Output: Fact: assets (namespace: http://www.xbrl.org/us/gaap/ci/2003/usfr-ci-2003, type: non_fraction) // 727 shares on 2021-04-16 } + +func ExampleParse() { + processed, err := xbrl.Parse([]byte(doc)) + if err != nil { + panic(err) + } + if err := processed.Validate(); err != nil { + panic(err) + } + + fmt.Println(len(processed.Facts)) + + // Output: 1 +} diff --git a/fact.go b/fact.go index c65a174..361d26c 100644 --- a/fact.go +++ b/fact.go @@ -6,6 +6,7 @@ import ( "strconv" ) +// FactType describes the structural category of a parsed Fact. type FactType string const ( @@ -17,7 +18,7 @@ const ( // A non-numeric fact is guaranteed to have an XMLName, ContextRef, and ValueStr. FactTypeNonNumeric FactType = "non_numeric" - // FactTypeNonFraction is a non-nil fact describing a numeric value that can precisely expressed as a simple value. + // FactTypeNonFraction is a non-nil fact describing a numeric value that can be precisely expressed as a simple value. // A non-fraction fact is guaranteed to have an XMLName, ContextRef, UnitRef, ValueStr, and exactly one of Precision or Decimals. // // For example: 727432 @@ -119,38 +120,115 @@ func (f Fact) Type() FactType { return FactTypeNonNumeric } -// IsValid confirms that f has at least the required fields that the FactType requires. -// Note that this function is not strict about extra fields existing. +// IsValid confirms that f has the structural fields required for its FactType. func (f Fact) IsValid() bool { + return f.Validate() == nil +} + +// Validate checks that f has the structural fields required for its FactType. +// It does not perform taxonomy-aware validation. +func (f Fact) Validate() error { // All facts must have a context ref if f.ContextRef == "" { - return false + return errors.New("missing contextRef") } // Some types have particular rules beyond what Type() checks for that must be true to be considered valid. switch f.Type() { case FactTypeFraction: - // Fraction must have a non-zero Denominator - return *f.Denominator != 0 + if f.UnitRef == nil || *f.UnitRef == "" { + return errors.New("fraction fact missing unitRef") + } + if f.Numerator == nil { + return errors.New("fraction fact missing numerator") + } + if f.Denominator == nil { + return errors.New("fraction fact missing denominator") + } + if *f.Denominator == 0 { + return errors.New("fraction fact denominator is zero") + } + if f.Precision != nil || f.Decimals != nil { + return errors.New("fraction fact cannot have precision or decimals") + } case FactTypeNonFraction: - // NonFractions must have either a non-nil Precision or non-nil Decimals field - return (f.Precision == nil) != (f.Decimals == nil) + if f.UnitRef == nil || *f.UnitRef == "" { + return errors.New("non-fraction fact missing unitRef") + } + if f.ValueStr == nil { + return errors.New("non-fraction fact missing value") + } + if f.Numerator != nil || f.Denominator != nil { + return errors.New("non-fraction fact cannot have numerator or denominator") + } + if (f.Precision == nil) == (f.Decimals == nil) { + return errors.New("non-fraction fact must have exactly one of precision or decimals") + } + if f.Precision != nil && !isValidPrecision(*f.Precision) { + return errors.New("non-fraction fact has invalid precision") + } + if f.Decimals != nil && !isValidDecimals(*f.Decimals) { + return errors.New("non-fraction fact has invalid decimals") + } + if _, err := strconv.ParseFloat(*f.ValueStr, 64); err != nil { + return err + } case FactTypeNonNumeric: - return f.ValueStr != nil - default: + if f.ValueStr == nil { + return errors.New("non-numeric fact missing value") + } + if f.UnitRef != nil { + return errors.New("non-numeric fact cannot have unitRef") + } + if f.Precision != nil || f.Decimals != nil { + return errors.New("non-numeric fact cannot have precision or decimals") + } + if f.Numerator != nil || f.Denominator != nil { + return errors.New("non-numeric fact cannot have numerator or denominator") + } + } + + return nil +} + +func isValidPrecision(precision string) bool { + if precision == "INF" { + return true + } + + value, err := strconv.Atoi(precision) + return err == nil && value >= 0 +} + +func isValidDecimals(decimals string) bool { + if decimals == "INF" { return true } + + _, err := strconv.Atoi(decimals) + return err == nil } // NumericValue attempts to return the numeric value this fact represents. -// This function returns // If this fact is a fraction type, this function returns the value of numerator / denominator. // Note that fraction type facts generally cannot be precisely represented as a float64 and may have some rounding error. func (f Fact) NumericValue() (float64, error) { switch f.Type() { case FactTypeFraction: + if f.Numerator == nil { + return 0, errors.New("fraction fact missing numerator") + } + if f.Denominator == nil { + return 0, errors.New("fraction fact missing denominator") + } + if *f.Denominator == 0 { + return 0, errors.New("fraction fact denominator is zero") + } return *f.Numerator / *f.Denominator, nil case FactTypeNonFraction: + if f.ValueStr == nil { + return 0, errors.New("non-fraction fact missing value") + } return strconv.ParseFloat(*f.ValueStr, 64) default: return 0, ErrNonNumericFactType diff --git a/fact_test.go b/fact_test.go index 70f77b4..8f7baae 100644 --- a/fact_test.go +++ b/fact_test.go @@ -108,3 +108,135 @@ func TestUnmarshalFact(t *testing.T) { assert.EqualValues(t, 1.0/3.0, val) }) } + +func TestFactValidation(t *testing.T) { + unitRef := "u1" + precision := "3" + decimals := "2" + invalidPrecision := "-1" + invalidDecimals := "not-an-integer" + + tests := []struct { + name string + fact Fact + wantErr string + }{ + { + name: "fact requires context ref", + fact: Fact{ + UnitRef: &unitRef, + Precision: &precision, + ValueStr: stringPtr("727"), + }, + wantErr: "missing contextRef", + }, + { + name: "non-fraction missing value", + fact: Fact{ + ContextRef: "c1", + UnitRef: &unitRef, + Precision: &precision, + }, + wantErr: "non-fraction fact missing value", + }, + { + name: "non-fraction with precision and decimals", + fact: Fact{ + ContextRef: "c1", + UnitRef: &unitRef, + Precision: &precision, + Decimals: &decimals, + ValueStr: stringPtr("727"), + }, + wantErr: "non-fraction fact must have exactly one of precision or decimals", + }, + { + name: "non-fraction with invalid precision", + fact: Fact{ + ContextRef: "c1", + UnitRef: &unitRef, + Precision: &invalidPrecision, + ValueStr: stringPtr("727"), + }, + wantErr: "non-fraction fact has invalid precision", + }, + { + name: "non-fraction with invalid decimals", + fact: Fact{ + ContextRef: "c1", + UnitRef: &unitRef, + Decimals: &invalidDecimals, + ValueStr: stringPtr("727"), + }, + wantErr: "non-fraction fact has invalid decimals", + }, + { + name: "non-numeric with precision", + fact: Fact{ + ContextRef: "c1", + Precision: &precision, + ValueStr: stringPtr("not numeric"), + }, + wantErr: "non-numeric fact cannot have precision or decimals", + }, + { + name: "fraction with zero denominator", + fact: Fact{ + ContextRef: "c1", + UnitRef: &unitRef, + Numerator: floatPtr(1), + Denominator: floatPtr(0), + }, + wantErr: "fraction fact denominator is zero", + }, + { + name: "fraction with precision", + fact: Fact{ + ContextRef: "c1", + UnitRef: &unitRef, + Precision: &precision, + Numerator: floatPtr(1), + Denominator: floatPtr(3), + }, + wantErr: "fraction fact cannot have precision or decimals", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.fact.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + assert.False(t, tt.fact.IsValid()) + }) + } +} + +func TestNilFactValidation(t *testing.T) { + nilValue := true + fact := Fact{ + XMLName: xml.Name{Space: "myns", Local: "nilFact"}, + Nil: &nilValue, + ContextRef: "c1", + } + + assert.Equal(t, FactTypeNil, fact.Type()) + assert.NoError(t, fact.Validate()) +} + +func TestNumericValueMalformedFactReturnsError(t *testing.T) { + unitRef := "u1" + precision := "3" + fact := Fact{ + ContextRef: "c1", + UnitRef: &unitRef, + Precision: &precision, + } + + _, err := fact.NumericValue() + assert.Error(t, err) +} + +func floatPtr(val float64) *float64 { + return &val +} diff --git a/fuzz_test.go b/fuzz_test.go new file mode 100644 index 0000000..f7370a2 --- /dev/null +++ b/fuzz_test.go @@ -0,0 +1,21 @@ +//go:build go1.18 +// +build go1.18 + +package xbrl + +import "testing" + +func FuzzParseAndValidate(f *testing.F) { + f.Add([]byte(``)) + f.Add([]byte(`e`)) + f.Add([]byte(`727`)) + + f.Fuzz(func(t *testing.T, data []byte) { + doc, err := Parse(data) + if err != nil { + return + } + + _ = doc.Validate() + }) +} diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..5d20fe6 --- /dev/null +++ b/parse.go @@ -0,0 +1,44 @@ +package xbrl + +import ( + "encoding/xml" + "errors" + "io" +) + +// Parse unmarshals an XBRL instance document from data. +// It does not call XBRL.Validate; parsing and structural validation are separate operations. +func Parse(data []byte) (XBRL, error) { + var doc XBRL + if err := xml.Unmarshal(data, &doc); err != nil { + return XBRL{}, err + } + + return doc, nil +} + +// ParseReader decodes an XBRL instance document from r using encoding/xml. +// It does not call XBRL.Validate. +func ParseReader(r io.Reader) (XBRL, error) { + if r == nil { + return XBRL{}, errors.New("nil reader") + } + + return Decode(xml.NewDecoder(r)) +} + +// Decode decodes an XBRL instance document with decoder. +// Use this helper when callers need to configure xml.Decoder, such as setting CharsetReader. +// It does not call XBRL.Validate. +func Decode(decoder *xml.Decoder) (XBRL, error) { + if decoder == nil { + return XBRL{}, errors.New("nil decoder") + } + + var doc XBRL + if err := decoder.Decode(&doc); err != nil { + return XBRL{}, err + } + + return doc, nil +} diff --git a/parse_test.go b/parse_test.go new file mode 100644 index 0000000..471e255 --- /dev/null +++ b/parse_test.go @@ -0,0 +1,56 @@ +package xbrl + +import ( + "encoding/xml" + "io" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseHelpers(t *testing.T) { + t.Run("Parse does not validate", func(t *testing.T) { + xbrlBytes, err := os.ReadFile("test_data/invalid_xbrl.xml") + require.NoError(t, err) + + doc, err := Parse(xbrlBytes) + require.NoError(t, err) + + assert.Error(t, doc.Validate()) + }) + + t.Run("ParseReader", func(t *testing.T) { + doc, err := ParseReader(strings.NewReader(``)) + require.NoError(t, err) + + assert.Equal(t, xml.Name{Local: "xbrl"}, doc.XMLName) + }) + + t.Run("Decode uses caller configured decoder", func(t *testing.T) { + f, err := os.Open("test_data/edgr-2004_10k.xml") + require.NoError(t, err) + defer f.Close() + + decoder := xml.NewDecoder(f) + decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + return input, nil + } + + doc, err := Decode(decoder) + require.NoError(t, err) + + assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "xbrl"}, doc.XMLName) + assert.Len(t, doc.ContextsByID, 4) + }) + + t.Run("nil inputs", func(t *testing.T) { + _, err := ParseReader(nil) + assert.EqualError(t, err, "nil reader") + + _, err = Decode(nil) + assert.EqualError(t, err, "nil decoder") + }) +} diff --git a/unit.go b/unit.go index 2aa3187..53a28b3 100644 --- a/unit.go +++ b/unit.go @@ -1,13 +1,18 @@ package xbrl -import "strings" +import ( + "errors" + "strings" +) // Unit specifies the unit in which a numeric fact has been measured. // A Unit can be either a simple measure, product of measures, or a ratio of products of measures with a numerator and a denominator. // // A simple unit that represents shares looks like: // -// shares +// +// shares +// // // // Numeric Facts reference units by ID via the Fact's `unitRef` attribute. @@ -18,17 +23,19 @@ type Unit struct { Divide *Divide `xml:"divide"` } -// Divide represents a ratios of Units that has a numerator and a denominator. +// Divide represents a ratio of units that has a numerator and a denominator. // For example, XBRL can represent a complex unit like earnings per share (EPS) as dollars per share (USD / share): // -// -// -// iso4127:USD -// -// -// shares -// -// +// +// +// +// iso4217:USD +// +// +// shares +// +// +// // // // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.8.2 @@ -47,22 +54,103 @@ type Measure struct { Value string `xml:",chardata"` } +// Measures is a product of one or more Measure values. type Measures []Measure +// Validate checks that u has the structural fields required by XBRL. +func (u Unit) Validate() error { + if u.ID == "" { + return errors.New("unit missing id") + } + if (len(u.Measures) == 0) == (u.Divide == nil) { + return errors.New("unit must have either measures or divide") + } + + if u.Divide != nil { + return u.Divide.Validate() + } + + return u.Measures.Validate() +} + +// IsValid validates u and returns true if no error was found. +func (u Unit) IsValid() bool { + return u.Validate() == nil +} + +// Validate checks that d has numerator and denominator measures. +func (d Divide) Validate() error { + if len(d.Numerator) == 0 { + return errors.New("divide missing numerator measures") + } + if len(d.Denominator) == 0 { + return errors.New("divide missing denominator measures") + } + if err := d.Numerator.Validate(); err != nil { + return err + } + if err := d.Denominator.Validate(); err != nil { + return err + } + + return nil +} + +// IsValid validates d and returns true if no error was found. +func (d Divide) IsValid() bool { + return d.Validate() == nil +} + +// Validate checks that m contains a non-empty measure value. +func (m Measure) Validate() error { + if m.Value == "" { + return errors.New("measure missing value") + } + + return nil +} + +// IsValid validates m and returns true if no error was found. +func (m Measure) IsValid() bool { + return m.Validate() == nil +} + +// Validate checks that m has at least one measure and that each measure has a value. +func (m Measures) Validate() error { + if len(m) == 0 { + return errors.New("measures missing values") + } + + for _, measure := range m { + if err := measure.Validate(); err != nil { + return err + } + } + + return nil +} + +// IsValid validates m and returns true if no error was found. +func (m Measures) IsValid() bool { + return m.Validate() == nil +} + // String returns a human readable representation of the Unit. func (u Unit) String() string { - // If the Divide element is not nil, there can be no top-level Meaures. + // If the Divide element is not nil, there can be no top-level Measures. if u.Divide != nil { return u.Divide.Numerator.String() + " / " + u.Divide.Denominator.String() } - // If the divider element is nil, there must be 1+ top-level Measures. + // If the Divide element is nil, there must be 1+ top-level Measures. return u.Measures.String() } // String returns the local name of the measure if the value is formatted as 'xsd:Qname', otherwise the value itself is returned. -// Ex: `iso4127:USD` -> "USD" -// `shares` -> "shares" +// This is a display helper only. Use Measure.Value when the raw XBRL value is significant. +// Ex: `iso4217:USD` -> "USD" +// +// `shares` -> "shares" func (m Measure) String() string { if index := strings.IndexRune(m.Value, ':'); index != -1 && index < len(m.Value) { return m.Value[index+1 : len(m.Value)] diff --git a/unit_test.go b/unit_test.go index 541c0b3..ab44587 100644 --- a/unit_test.go +++ b/unit_test.go @@ -106,3 +106,68 @@ func TestUnmarshalUnit(t *testing.T) { assert.Equal(t, "USD / feet * feet", unit.String()) }) } + +func TestUnitValidation(t *testing.T) { + tests := []struct { + name string + unit Unit + wantErr string + }{ + { + name: "unit requires id", + unit: Unit{Measures: Measures{{Value: "shares"}}}, + wantErr: "unit missing id", + }, + { + name: "unit requires measure or divide", + unit: Unit{ID: "u1"}, + wantErr: "unit must have either measures or divide", + }, + { + name: "unit cannot have measures and divide", + unit: Unit{ + ID: "u1", + Measures: Measures{{Value: "shares"}}, + Divide: &Divide{ + Numerator: Measures{{Value: "iso4217:USD"}}, + Denominator: Measures{{Value: "shares"}}, + }, + }, + wantErr: "unit must have either measures or divide", + }, + { + name: "divide requires denominator measures", + unit: Unit{ + ID: "u1", + Divide: &Divide{ + Numerator: Measures{{Value: "iso4217:USD"}}, + }, + }, + wantErr: "divide missing denominator measures", + }, + { + name: "measure requires value", + unit: Unit{ + ID: "u1", + Measures: Measures{{}}, + }, + wantErr: "measure missing value", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.unit.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + assert.False(t, tt.unit.IsValid()) + }) + } +} + +func TestMeasureValuePreservesRawQName(t *testing.T) { + measure := Measure{Value: "iso4217:USD"} + + assert.Equal(t, "iso4217:USD", measure.Value) + assert.Equal(t, "USD", measure.String()) +} diff --git a/xbrl.go b/xbrl.go index b41a692..362be86 100644 --- a/xbrl.go +++ b/xbrl.go @@ -5,22 +5,44 @@ import ( "fmt" ) -// NotImplemented represents an expected element in the XBRL that isn't handled yet, but should not be considered a Fact. +const ( + xbrlInstanceNamespace = "http://www.xbrl.org/2003/instance" +) + +// NotImplemented represents a count of expected XBRL elements that are not handled in detail. +// New code should prefer the RawElement fields that preserve XML names, attributes, and inner XML. type NotImplemented []*struct{} +// RawElement preserves an XML element that this package does not model in detail. +type RawElement struct { + XMLName xml.Name + Attributes []xml.Attr `xml:",any,attr"` + InnerXML string `xml:",innerxml"` +} + // RawXBRL represents the XML structure of an XBRL document. // This is not a feature complete XBRL parser! -// See the fields of type NotImplemented for an idea of what's missing. +// See the fields of type RawElement and NotImplemented for an idea of what's missing. // Also note that this struct doesn't support Tuple facts (https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.9) // // You can use this struct directly, but XBRL is structured in a more convenient way. // See the comment on XBRL for more info. type RawXBRL struct { + XMLName xml.Name + Attributes []xml.Attr + Contexts []Context `xml:"context"` Units []Unit `xml:"unit"` Facts []Fact `xml:",any"` + SchemaRefs []RawElement + LinkbaseRefs []RawElement + RoleRefs []RawElement + ArcRoleRefs []RawElement + FootnoteLinks []RawElement + UnsupportedTopLevel []RawElement + // The fields below are not properly implemented, but need to be here so they aren't lumped into the `Facts` slice. SchemaRef NotImplemented `xml:"schemaRef"` @@ -30,14 +52,160 @@ type RawXBRL struct { FootnoteLink NotImplemented `xml:"footnoteLink"` } -// XBRL contains maps for contexts and units so they can be accessed easier when looping through facts. +// UnmarshalXML implements xml.Unmarshaler and preserves the XBRL root envelope +// while still routing taxonomy-defined top-level elements into Facts. +func (r *RawXBRL) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + *r = RawXBRL{ + XMLName: start.Name, + Attributes: copyAttrs(start.Attr), + } + + for { + token, err := d.Token() + if err != nil { + return err + } + + switch token := token.(type) { + case xml.StartElement: + if err := r.decodeChild(d, token); err != nil { + return err + } + case xml.EndElement: + if token.Name == start.Name { + return nil + } + } + } +} + +func (r *RawXBRL) decodeChild(d *xml.Decoder, start xml.StartElement) error { + switch start.Name.Local { + case "context": + var context Context + if err := d.DecodeElement(&context, &start); err != nil { + return err + } + r.Contexts = append(r.Contexts, context) + case "unit": + var unit Unit + if err := d.DecodeElement(&unit, &start); err != nil { + return err + } + r.Units = append(r.Units, unit) + case "schemaRef": + element, err := decodeRawElement(d, start) + if err != nil { + return err + } + r.SchemaRefs = append(r.SchemaRefs, element) + r.SchemaRef = append(r.SchemaRef, &struct{}{}) + case "linkbaseRef": + element, err := decodeRawElement(d, start) + if err != nil { + return err + } + r.LinkbaseRefs = append(r.LinkbaseRefs, element) + r.LinkbaseRef = append(r.LinkbaseRef, &struct{}{}) + case "roleRef": + element, err := decodeRawElement(d, start) + if err != nil { + return err + } + r.RoleRefs = append(r.RoleRefs, element) + r.RoleRef = append(r.RoleRef, &struct{}{}) + case "arcroleRef": + element, err := decodeRawElement(d, start) + if err != nil { + return err + } + r.ArcRoleRefs = append(r.ArcRoleRefs, element) + r.ArcRoleRef = append(r.ArcRoleRef, &struct{}{}) + case "footnoteLink": + element, err := decodeRawElement(d, start) + if err != nil { + return err + } + r.FootnoteLinks = append(r.FootnoteLinks, element) + r.FootnoteLink = append(r.FootnoteLink, &struct{}{}) + default: + if isKnownUnsupportedTopLevel(start.Name) { + element, err := decodeRawElement(d, start) + if err != nil { + return err + } + r.UnsupportedTopLevel = append(r.UnsupportedTopLevel, element) + return nil + } + + var fact Fact + if err := d.DecodeElement(&fact, &start); err != nil { + return err + } + r.Facts = append(r.Facts, fact) + } + + return nil +} + +func isKnownUnsupportedTopLevel(name xml.Name) bool { + return name.Space == xbrlInstanceNamespace && (name.Local == "item" || name.Local == "tuple") +} + +func decodeRawElement(d *xml.Decoder, start xml.StartElement) (RawElement, error) { + var element RawElement + if err := d.DecodeElement(&element, &start); err != nil { + return RawElement{}, err + } + + return element, nil +} + +func copyAttrs(attrs []xml.Attr) []xml.Attr { + if len(attrs) == 0 { + return nil + } + + copied := make([]xml.Attr, len(attrs)) + copy(copied, attrs) + return copied +} + +func copyRawElements(elements []RawElement) []RawElement { + if len(elements) == 0 { + return nil + } + + copied := make([]RawElement, len(elements)) + for index, element := range elements { + copied[index] = element + copied[index].Attributes = copyAttrs(element.Attributes) + } + + return copied +} + +// XBRL contains raw context and unit slices plus maps so contexts and units can be accessed easier when looping through facts. // You can either unmarshal XML directly into this struct (it has a custom unmarshaller), // or you can unmarshal XML into a RawXBRL struct and call NewProcessedXBRL(RawXBRL) to process the raw XBRL into this format. type XBRL struct { + XMLName xml.Name + Attributes []xml.Attr + + Contexts []Context + Units []Unit + ContextsByID map[string]Context UnitsByID map[string]Unit Facts []Fact + + SchemaRefs []RawElement + LinkbaseRefs []RawElement + RoleRefs []RawElement + ArcRoleRefs []RawElement + FootnoteLinks []RawElement + UnsupportedTopLevel []RawElement } // NewProcessedXBRL constructs a XBRL struct from a RawXBRL struct. @@ -54,9 +222,19 @@ func NewProcessedXBRL(raw RawXBRL) XBRL { } return XBRL{ - ContextsByID: contextsByID, - UnitsByID: unitsByID, - Facts: raw.Facts, + XMLName: raw.XMLName, + Attributes: copyAttrs(raw.Attributes), + Contexts: append([]Context(nil), raw.Contexts...), + Units: append([]Unit(nil), raw.Units...), + ContextsByID: contextsByID, + UnitsByID: unitsByID, + Facts: append([]Fact(nil), raw.Facts...), + SchemaRefs: copyRawElements(raw.SchemaRefs), + LinkbaseRefs: copyRawElements(raw.LinkbaseRefs), + RoleRefs: copyRawElements(raw.RoleRefs), + ArcRoleRefs: copyRawElements(raw.ArcRoleRefs), + FootnoteLinks: copyRawElements(raw.FootnoteLinks), + UnsupportedTopLevel: copyRawElements(raw.UnsupportedTopLevel), } } @@ -72,20 +250,35 @@ func (x *XBRL) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { return nil } -// Validate checks that all Facts are valid and reference contexts and units that also exist. -// Note that since this parser does not properly handle Tuple elements, it's possible that some malformed Facts were unmarshalled. +// Validate checks basic XBRL structure: contexts, units, facts, duplicate IDs, and fact references. +// It does not perform taxonomy-aware, accounting, linkbase, footnote, tuple, or scenario validation. func (x XBRL) Validate() error { + contextsByID, err := x.validatedContextsByID() + if err != nil { + return err + } + + unitsByID, err := x.validatedUnitsByID() + if err != nil { + return err + } + + if len(x.UnsupportedTopLevel) > 0 { + element := x.UnsupportedTopLevel[0] + return fmt.Errorf("unsupported top-level element: %s:%s", element.XMLName.Space, element.XMLName.Local) + } + for _, fact := range x.Facts { - if !fact.IsValid() { - return fmt.Errorf("invalid fact: %s:%s", fact.XMLName.Space, fact.XMLName.Local) + if err := fact.Validate(); err != nil { + return fmt.Errorf("invalid fact (%s:%s): %w", fact.XMLName.Space, fact.XMLName.Local, err) } - if _, exists := x.ContextsByID[fact.ContextRef]; !exists { + if _, exists := contextsByID[fact.ContextRef]; !exists { return fmt.Errorf("fact (%s:%s) references non-existent context: %s", fact.XMLName.Space, fact.XMLName.Local, fact.ContextRef) } if fact.UnitRef != nil { - if _, exists := x.UnitsByID[*fact.UnitRef]; !exists { + if _, exists := unitsByID[*fact.UnitRef]; !exists { return fmt.Errorf("fact (%s:%s) references non-existent unit: %s", fact.XMLName.Space, fact.XMLName.Local, *fact.UnitRef) } } @@ -94,7 +287,47 @@ func (x XBRL) Validate() error { return nil } -// IsValid validates the Facts in this struct and returns true if no error was found. +// IsValid validates this struct and returns true if no error was found. func (x XBRL) IsValid() bool { return x.Validate() == nil } + +func (x XBRL) validatedContextsByID() (map[string]Context, error) { + if len(x.Contexts) == 0 { + return x.ContextsByID, nil + } + + contextsByID := make(map[string]Context, len(x.Contexts)) + for _, context := range x.Contexts { + if err := context.Validate(); err != nil { + return nil, fmt.Errorf("invalid context (%s): %w", context.ID, err) + } + if _, exists := contextsByID[context.ID]; exists { + return nil, fmt.Errorf("duplicate context id: %s", context.ID) + } + + contextsByID[context.ID] = context + } + + return contextsByID, nil +} + +func (x XBRL) validatedUnitsByID() (map[string]Unit, error) { + if len(x.Units) == 0 { + return x.UnitsByID, nil + } + + unitsByID := make(map[string]Unit, len(x.Units)) + for _, unit := range x.Units { + if err := unit.Validate(); err != nil { + return nil, fmt.Errorf("invalid unit (%s): %w", unit.ID, err) + } + if _, exists := unitsByID[unit.ID]; exists { + return nil, fmt.Errorf("duplicate unit id: %s", unit.ID) + } + + unitsByID[unit.ID] = unit + } + + return unitsByID, nil +} diff --git a/xbrl_test.go b/xbrl_test.go index 090f580..58eda94 100644 --- a/xbrl_test.go +++ b/xbrl_test.go @@ -26,6 +26,29 @@ func TestUnmarshalXBRL(t *testing.T) { assert.Equal(t, 283, len(content.ContextsByID)) assert.Equal(t, 9, len(content.UnitsByID)) assert.Equal(t, 1070, len(content.Facts)) + require.Len(t, content.SchemaRefs, 1) + assert.Equal(t, "aapl-20210327.xsd", requireAttr(t, content.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href")) + + durationContext := content.ContextsByID["i02c0f3e92d75432fbe3c6a24022bf7b0_D20200927-20210327"] + assert.Equal(t, PeriodTypeDuration, durationContext.Period.Type()) + assert.Equal(t, "2020-09-27", *durationContext.Period.StartDate) + assert.Equal(t, "2021-03-27", *durationContext.Period.EndDate) + + segmentedContext := content.ContextsByID["iff44040cd61344d085f7a2b7a1076cb1_D20200927-20210327"] + require.Len(t, segmentedContext.Entity.Segments, 1) + assert.Equal(t, xml.Name{Space: "http://xbrl.org/2006/xbrldi", Local: "explicitMember"}, segmentedContext.Entity.Segments[0].XMLName) + assert.Equal(t, "us-gaap:CommonStockMember", segmentedContext.Entity.Segments[0].Value) + assert.Equal(t, "us-gaap:StatementClassOfStockAxis", requireAttr(t, segmentedContext.Entity.Segments[0].Attributes, "", "dimension")) + + usdPerShare := content.UnitsByID["usdPerShare"] + require.NotNil(t, usdPerShare.Divide) + assert.Equal(t, "iso4217:USD", usdPerShare.Divide.Numerator[0].Value) + assert.Equal(t, "USD / shares", usdPerShare.String()) + + eps := requireFact(t, content.Facts, "http://fasb.org/us-gaap/2020-01-31", "EarningsPerShareBasic", "1.41") + assert.Equal(t, "ia09408265617434fbc06a7e4c6b101bc_D20201227-20210327", eps.ContextRef) + require.NotNil(t, eps.UnitRef) + assert.Equal(t, "usdPerShare", *eps.UnitRef) }) t.Run("real-world xbrl from 2004", func(t *testing.T) { @@ -46,6 +69,20 @@ func TestUnmarshalXBRL(t *testing.T) { assert.Equal(t, 4, len(content.ContextsByID)) assert.Equal(t, 2, len(content.UnitsByID)) assert.Equal(t, 154, len(content.Facts)) + require.Len(t, content.SchemaRefs, 1) + assert.Equal(t, "edgr-20050228.xsd", requireAttr(t, content.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href")) + + firstFact := content.Facts[0] + assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/us/fr/common/pte/2005-02-28", Local: "AccountsPayable"}, firstFact.XMLName) + assert.Equal(t, "edgr_4473_inst_YTD_20041231", firstFact.ContextRef) + require.NotNil(t, firstFact.UnitRef) + assert.Equal(t, "USD", *firstFact.UnitRef) + assert.Equal(t, "995000", firstFact.Value()) + + instantContext := content.ContextsByID["edgr_4473_inst_YTD_20041231"] + assert.Equal(t, PeriodTypeInstant, instantContext.Period.Type()) + assert.Equal(t, "2004-12-31", *instantContext.Period.Instant) + assert.Equal(t, "USD", content.UnitsByID["USD"].String()) }) t.Run("simple xbrl happy path", func(t *testing.T) { @@ -57,6 +94,13 @@ func TestUnmarshalXBRL(t *testing.T) { require.NoError(t, xml.Unmarshal(xbrlBytes, &content)) require.NoError(t, content.Validate()) + assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/instance", Local: "xbrl"}, content.XMLName) + assert.NotEmpty(t, content.Attributes) + require.Len(t, content.SchemaRefs, 1) + assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/linkbase", Local: "schemaRef"}, content.SchemaRefs[0].XMLName) + assert.Len(t, content.Contexts, 1) + assert.Len(t, content.Units, 1) + require.Len(t, content.ContextsByID, 1) expectedContext := Context{ ID: "c1", @@ -111,6 +155,148 @@ func TestUnmarshalXBRL(t *testing.T) { }) } +func TestUnmarshalRawXBRLPreservesEnvelope(t *testing.T) { + xbrlBytes, err := os.ReadFile("test_data/simple_xbrl.xml") + require.NoError(t, err) + + var raw RawXBRL + require.NoError(t, xml.Unmarshal(xbrlBytes, &raw)) + + assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/instance", Local: "xbrl"}, raw.XMLName) + assert.NotEmpty(t, raw.Attributes) + require.Len(t, raw.SchemaRefs, 1) + assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/linkbase", Local: "schemaRef"}, raw.SchemaRefs[0].XMLName) + assert.NotEmpty(t, raw.SchemaRefs[0].Attributes) + assert.Len(t, raw.SchemaRef, 1) + assert.Len(t, raw.Facts, 2) +} + +func TestUnmarshalRawXBRLPreservesReferenceAndUnsupportedElements(t *testing.T) { + // language=xml + doc := ` + + + + + + Preserved footnote text + + + nested +` + + var raw RawXBRL + require.NoError(t, xml.Unmarshal([]byte(doc), &raw)) + + require.Len(t, raw.SchemaRefs, 1) + assert.Equal(t, "example.xsd", requireAttr(t, raw.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href")) + assert.Len(t, raw.SchemaRef, 1) + + require.Len(t, raw.LinkbaseRefs, 1) + assert.Equal(t, "labels.xml", requireAttr(t, raw.LinkbaseRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href")) + assert.Len(t, raw.LinkbaseRef, 1) + + require.Len(t, raw.RoleRefs, 1) + assert.Equal(t, "http://example.com/role", requireAttr(t, raw.RoleRefs[0].Attributes, "", "roleURI")) + assert.Len(t, raw.RoleRef, 1) + + require.Len(t, raw.ArcRoleRefs, 1) + assert.Equal(t, "http://example.com/arcrole", requireAttr(t, raw.ArcRoleRefs[0].Attributes, "", "arcroleURI")) + assert.Len(t, raw.ArcRoleRef, 1) + + require.Len(t, raw.FootnoteLinks, 1) + assert.Contains(t, raw.FootnoteLinks[0].InnerXML, "Preserved footnote text") + assert.Len(t, raw.FootnoteLink, 1) + + require.Len(t, raw.UnsupportedTopLevel, 2) + assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "item"}, raw.UnsupportedTopLevel[0].XMLName) + assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "tuple"}, raw.UnsupportedTopLevel[1].XMLName) + assert.Contains(t, raw.UnsupportedTopLevel[1].InnerXML, "nested") + assert.Empty(t, raw.Facts) +} + +func TestValidateRejectsDuplicateContextIDs(t *testing.T) { + // language=xml + doc := ` + + + 0000320193 + 2021-03-27 + + + 0000320193 + 2021-03-28 + + shares + 727 +` + + var content XBRL + require.NoError(t, xml.Unmarshal([]byte(doc), &content)) + + assert.EqualError(t, content.Validate(), "duplicate context id: c1") +} + +func TestValidateRejectsDuplicateUnitIDs(t *testing.T) { + // language=xml + doc := ` + + + 0000320193 + 2021-03-27 + + shares + iso4217:USD + 727 +` + + var content XBRL + require.NoError(t, xml.Unmarshal([]byte(doc), &content)) + + assert.EqualError(t, content.Validate(), "duplicate unit id: u1") +} + +func TestValidateRejectsKnownUnsupportedTopLevelElements(t *testing.T) { + // language=xml + doc := ` + +` + + var content XBRL + require.NoError(t, xml.Unmarshal([]byte(doc), &content)) + + require.Len(t, content.UnsupportedTopLevel, 1) + assert.EqualError(t, content.Validate(), "unsupported top-level element: http://www.xbrl.org/2003/instance:tuple") +} + func stringPtr(str string) *string { return &str } + +func requireAttr(t *testing.T, attrs []xml.Attr, space, local string) string { + t.Helper() + + for _, attr := range attrs { + if attr.Name.Space == space && attr.Name.Local == local { + return attr.Value + } + } + + require.Failf(t, "missing XML attribute", "space=%q local=%q attrs=%v", space, local, attrs) + return "" +} + +func requireFact(t *testing.T, facts []Fact, space, local, value string) Fact { + t.Helper() + + for _, fact := range facts { + if fact.XMLName.Space == space && fact.XMLName.Local == local && fact.Value() == value { + return fact + } + } + + require.Failf(t, "missing fact", "space=%q local=%q value=%q", space, local, value) + return Fact{} +}