From a9117796d9437143f94eec9d197bd4a384cd207a Mon Sep 17 00:00:00 2001
From: Thomas Marcelis <thomasmarcelis@gmail.com>
Date: Sat, 2 May 2026 15:19:50 +0200
Subject: [PATCH] Preserve raw XBRL and tighten validation

---
 README.md                 |   6 +-
 context.go                | 115 +++++++++++++++--
 context_test.go           |  92 ++++++++++++++
 doc.go                    |  17 +++
 example_unmarshal_test.go |  31 +++--
 fact.go                   | 100 +++++++++++++--
 fact_test.go              | 132 +++++++++++++++++++
 fuzz_test.go              |  21 ++++
 parse.go                  |  44 +++++++
 parse_test.go             |  56 +++++++++
 unit.go                   | 118 ++++++++++++++---
 unit_test.go              |  65 ++++++++++
 xbrl.go                   | 259 ++++++++++++++++++++++++++++++++++++--
 xbrl_test.go              | 186 +++++++++++++++++++++++++++
 14 files changed, 1181 insertions(+), 61 deletions(-)
 create mode 100644 doc.go
 create mode 100644 fuzz_test.go
 create mode 100644 parse.go
 create mode 100644 parse_test.go

diff --git a/README.md b/README.md
index 9fb92ac..b690377 100644
--- a/README.md
+++ b/README.md
@@ -8,12 +8,14 @@ This library is based around the [XBRL 2.1 spec](https://www.xbrl.org/Specificat
 It implements support for parsing basic facts (not tuples of facts), contexts and units through the `xml.Unmarshaler` interface.
  
 See the package example in the godocs for how to unmarshal into the `XBRL` struct.
+You can also use `Parse`, `ParseReader`, or `Decode` as small convenience helpers around the same `encoding/xml` path.
 
-This library supports basic validation that checks for malformed facts and broken references between facts and contexts/units (see `XBRL.Validate()`),
+This library supports structural validation that checks malformed contexts, units, facts, duplicate IDs, unsupported scenarios, unsupported top-level base `item` and `tuple` elements, and broken references between facts and contexts/units (see `XBRL.Validate()`),
 but it does _not_ implement full semantic validation of XBRL documents.
 
 There are no abstractions added on-top of the XBRL data structure, which makes this library flexible and simple,
 but it also means you might have to read up a bit on how XBRL works to take full advantage of it.
+The parser preserves lower-level XML details such as root attributes, XML names, raw link/reference elements, and generic segment content for callers that need them.
 
 To give you a head start, here's some basics about XBRL:
 
@@ -52,7 +54,7 @@ The above fact doesn't directly tell us in which quarter EPS was `1.41`. That's
 ### Contexts
 
 A [Context](https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7)
-describes a business entity, period of time, and an optional scenario (this library doesn't currently support scenarios, so we're going to gloss over them).  
+describes a business entity, period of time, and an optional scenario (this library preserves scenario XML, but does not interpret scenario semantics).
 
 When a fact references a context, it gives the fact more detail to help us understand what it means.
 
diff --git a/context.go b/context.go
index 057bcb3..f5a6f41 100644
--- a/context.go
+++ b/context.go
@@ -1,15 +1,20 @@
 package xbrl
 
-import "encoding/xml"
+import (
+	"encoding/xml"
+	"errors"
+)
 
-// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario (scenario is NOT implemented).
+// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario.
 // All of which are necessary for understanding a business Fact captured as an XBRL item.
+// Scenario is preserved as raw XML, but scenario validation and interpretation are not implemented.
 // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7
 type Context struct {
 	ID string `xml:"id,attr"`
 
-	Period Period `xml:"period"`
-	Entity Entity `xml:"entity"`
+	Period   Period      `xml:"period"`
+	Entity   Entity      `xml:"entity"`
+	Scenario *RawElement `xml:"scenario"`
 }
 
 // Entity documents the business entity for a Context (business, government department, individual, etc.).
@@ -19,6 +24,23 @@ type Entity struct {
 	Segments   Segments   `xml:"segment"`
 }
 
+// Validate checks that e contains the structural fields required by XBRL.
+func (e Entity) Validate() error {
+	if e.Identifier.Scheme == "" {
+		return errors.New("entity identifier missing scheme")
+	}
+	if e.Identifier.Value == "" {
+		return errors.New("entity identifier missing value")
+	}
+
+	return nil
+}
+
+// IsValid validates e and returns true if no error was found.
+func (e Entity) IsValid() bool {
+	return e.Validate() == nil
+}
+
 // Identifier specifies a scheme for identifying business entities and an identifier that follows the scheme.
 // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.3.1
 // For Example:
@@ -43,6 +65,7 @@ type Segment struct {
 	XMLName    xml.Name
 	Attributes []xml.Attr `xml:",any,attr"`
 	Value      string     `xml:",chardata"`
+	InnerXML   string     `xml:",innerxml"`
 }
 
 // UnmarshalXML implements xml.Unmarshaller for Segments.
@@ -60,22 +83,27 @@ func (s *Segments) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
 	return nil
 }
 
+// PeriodType describes which supported shape a Period has.
 type PeriodType string
 
 // All the supported PeriodType values. See Period.Type() for more information.
 const (
+	// PeriodTypeDuration is a period with startDate and endDate.
 	PeriodTypeDuration PeriodType = "duration"
-	PeriodTypeInstant  PeriodType = "instant"
-	PeriodTypeForever  PeriodType = "forever"
-	PeriodTypeInvalid  PeriodType = "invalid"
+	// PeriodTypeInstant is a period with instant.
+	PeriodTypeInstant PeriodType = "instant"
+	// PeriodTypeForever is a period with forever.
+	PeriodTypeForever PeriodType = "forever"
+	// PeriodTypeInvalid is a period that does not match exactly one supported shape.
+	PeriodTypeInvalid PeriodType = "invalid"
 )
 
 // Period contains an instant or interval of time for a Context.
 // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.2
 type Period struct {
-	// StartDate is non-nil and guaranteed to be before EndDate if Period.Type() returns Duration.
+	// StartDate is non-nil if Period.Type() returns Duration.
 	StartDate *string `xml:"startDate"`
-	// EndDate is non-nil and guaranteed to be after StartDate if Period.Type() returns Duration.
+	// EndDate is non-nil if Period.Type() returns Duration.
 	EndDate *string `xml:"endDate"`
 
 	// Instant is non-nil if Period.Type() returns Instant
@@ -90,17 +118,78 @@ type Period struct {
 // Type returns the type of this period to help clarify what fields in the Period struct are non-nil and valid to use.
 // The comments on the attributes inside the Period struct explain when they can be used depending on what this function returns.
 func (p Period) Type() PeriodType {
+	periodType := PeriodTypeInvalid
+	matches := 0
+
 	if p.Forever != nil {
-		return PeriodTypeForever
+		periodType = PeriodTypeForever
+		matches++
 	}
 
 	if p.Instant != nil {
-		return PeriodTypeInstant
+		periodType = PeriodTypeInstant
+		matches++
 	}
 
 	if p.StartDate != nil && p.EndDate != nil {
-		return PeriodTypeDuration
+		periodType = PeriodTypeDuration
+		matches++
+	}
+
+	if matches != 1 {
+		return PeriodTypeInvalid
+	}
+
+	return periodType
+}
+
+// Validate checks that p has exactly one supported XBRL period shape.
+func (p Period) Validate() error {
+	switch p.Type() {
+	case PeriodTypeDuration:
+		if *p.StartDate == "" {
+			return errors.New("duration period missing startDate")
+		}
+		if *p.EndDate == "" {
+			return errors.New("duration period missing endDate")
+		}
+	case PeriodTypeInstant:
+		if *p.Instant == "" {
+			return errors.New("instant period missing value")
+		}
+	case PeriodTypeForever:
+		return nil
+	default:
+		return errors.New("period must have exactly one of duration, instant, or forever")
 	}
 
-	return PeriodTypeInvalid
+	return nil
+}
+
+// IsValid validates p and returns true if no error was found.
+func (p Period) IsValid() bool {
+	return p.Validate() == nil
+}
+
+// Validate checks that c contains the structural fields this parser supports.
+func (c Context) Validate() error {
+	if c.ID == "" {
+		return errors.New("context missing id")
+	}
+	if err := c.Entity.Validate(); err != nil {
+		return err
+	}
+	if err := c.Period.Validate(); err != nil {
+		return err
+	}
+	if c.Scenario != nil {
+		return errors.New("scenario is not supported")
+	}
+
+	return nil
+}
+
+// IsValid validates c and returns true if no error was found.
+func (c Context) IsValid() bool {
+	return c.Validate() == nil
 }
diff --git a/context_test.go b/context_test.go
index 18fbdf1..b41fc38 100644
--- a/context_test.go
+++ b/context_test.go
@@ -74,13 +74,105 @@ func TestUnmarshalContext(t *testing.T) {
 		assert.Equal(t, xml.Name{Space: "xbrldi", Local: "explicitMember"}, context.Entity.Segments[0].XMLName)
 		assert.Equal(t, []xml.Attr{{Name: xml.Name{Local: "dimension"}, Value: "us-gaap:StatementClassOfStockAxis"}}, context.Entity.Segments[0].Attributes)
 		assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].Value)
+		assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].InnerXML)
 
 		assert.Equal(t, xml.Name{Space: "myns", Local: "cool_segment"}, context.Entity.Segments[1].XMLName)
 		assert.Empty(t, context.Entity.Segments[1].Attributes)
 		assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].Value)
+		assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].InnerXML)
 
 		assert.Equal(t, PeriodTypeDuration, context.Period.Type())
 		assert.Equal(t, "2020-09-27", *context.Period.StartDate)
 		assert.Equal(t, "2021-03-27", *context.Period.EndDate)
 	})
+
+	t.Run("segment preserves nested raw XML", func(t *testing.T) {
+		// language=xml
+		contextXML := `<context id="nested_segment">
+    <entity>
+        <identifier scheme="http://www.sec.gov/CIK">0000320193</identifier>
+        <segment>
+            <dim:typedMember dimension="custom:Axis"><custom:domain>value</custom:domain></dim:typedMember>
+        </segment>
+    </entity>
+    <period><forever/></period>
+</context>`
+
+		var context Context
+		require.NoError(t, xml.Unmarshal([]byte(contextXML), &context))
+
+		require.Len(t, context.Entity.Segments, 1)
+		assert.Equal(t, xml.Name{Space: "dim", Local: "typedMember"}, context.Entity.Segments[0].XMLName)
+		assert.Contains(t, context.Entity.Segments[0].InnerXML, "<custom:domain>value</custom:domain>")
+	})
+}
+
+func TestContextValidation(t *testing.T) {
+	t.Run("period must have exactly one shape", func(t *testing.T) {
+		period := Period{
+			StartDate: stringPtr("2020-09-27"),
+			EndDate:   stringPtr("2021-03-27"),
+			Instant:   stringPtr("2021-03-27"),
+		}
+
+		assert.Equal(t, PeriodTypeInvalid, period.Type())
+		assert.False(t, period.IsValid())
+	})
+
+	t.Run("duration requires start and end values", func(t *testing.T) {
+		period := Period{
+			StartDate: stringPtr("2020-09-27"),
+			EndDate:   stringPtr(""),
+		}
+
+		assert.EqualError(t, period.Validate(), "duration period missing endDate")
+	})
+
+	t.Run("entity requires identifier scheme and value", func(t *testing.T) {
+		entity := Entity{
+			Identifier: Identifier{
+				Scheme: "http://www.sec.gov/CIK",
+			},
+		}
+
+		assert.EqualError(t, entity.Validate(), "entity identifier missing value")
+	})
+
+	t.Run("context requires id and entity identifier", func(t *testing.T) {
+		context := Context{
+			Period: Period{Instant: stringPtr("2021-03-27")},
+			Entity: Entity{
+				Identifier: Identifier{
+					Scheme: "http://www.sec.gov/CIK",
+					Value:  "0000320193",
+				},
+			},
+		}
+
+		assert.EqualError(t, context.Validate(), "context missing id")
+		assert.False(t, context.IsValid())
+	})
+
+	t.Run("scenario is preserved but unsupported by validation", func(t *testing.T) {
+		// language=xml
+		contextXML := `<context id="scenario_context">
+    <entity>
+        <identifier scheme="http://www.sec.gov/CIK">0000320193</identifier>
+    </entity>
+    <period>
+        <instant>2021-03-27</instant>
+    </period>
+    <scenario>
+        <myns:forecast>true</myns:forecast>
+    </scenario>
+</context>`
+
+		var context Context
+		require.NoError(t, xml.Unmarshal([]byte(contextXML), &context))
+
+		require.NotNil(t, context.Scenario)
+		assert.Equal(t, xml.Name{Local: "scenario"}, context.Scenario.XMLName)
+		assert.Contains(t, context.Scenario.InnerXML, "forecast")
+		assert.EqualError(t, context.Validate(), "scenario is not supported")
+	})
 }
diff --git a/doc.go b/doc.go
new file mode 100644
index 0000000..66c8735
--- /dev/null
+++ b/doc.go
@@ -0,0 +1,17 @@
+// Package xbrl parses XBRL 2.1 instance documents into simple Go data.
+//
+// The package preserves XBRL concepts such as facts, contexts, periods,
+// entities, segments, units, XML names, attributes, and raw reference elements.
+// It does not load taxonomies, resolve linkbases, normalize financial
+// statements, transform Inline XBRL, or perform accounting-rule validation.
+//
+// XML unmarshalling is a first-class API:
+//
+//	var doc xbrl.XBRL
+//	err := xml.Unmarshal(data, &doc)
+//
+// Parse, ParseReader, and Decode are convenience helpers around the same
+// encoding/xml path. Parsing and validation are separate operations; call
+// XBRL.Validate when you need structural checks for contexts, units, facts, and
+// references.
+package xbrl
diff --git a/example_unmarshal_test.go b/example_unmarshal_test.go
index 5abdf74..4cd0ceb 100644
--- a/example_unmarshal_test.go
+++ b/example_unmarshal_test.go
@@ -7,7 +7,11 @@ import (
 	"github.com/massive-com/xbrl-parser/v2"
 )
 
-const doc = `<xbrl>
+const doc = `<xbrl
+    xmlns="http://www.xbrl.org/2003/instance"
+    xmlns:link="http://www.xbrl.org/2003/linkbase"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    xmlns:ci="http://www.xbrl.org/us/gaap/ci/2003/usfr-ci-2003">
     <link:schemaRef xlink:type="simple" xlink:href="http://www.xbrl.org/us/fr/ci/2000-07-31/usfr-ci-2003.xsd"/>
 
     <context id="c1">
@@ -32,12 +36,11 @@ func Example() {
 	if err := xml.Unmarshal([]byte(doc), &processed); err != nil {
 		panic(err)
 	}
-
-	fact := processed.Facts[0]
-	if !fact.IsValid() {
-		panic("fact invalid!")
+	if err := processed.Validate(); err != nil {
+		panic(err)
 	}
 
+	fact := processed.Facts[0]
 	factType := fact.Type()
 	numericValue, err := fact.NumericValue()
 
@@ -48,9 +51,23 @@ func Example() {
 		panic(err)
 	}
 
-	fmt.Printf("Fact: %s:%s (type: %s)\n", fact.XMLName.Space, fact.XMLName.Local, factType)
+	fmt.Printf("Fact: %s (namespace: %s, type: %s)\n", fact.XMLName.Local, fact.XMLName.Space, factType)
 	fmt.Printf("      %.0f %s on %s\n", numericValue, factUnit.String(), *factContext.Period.Instant)
 
-	// Output: Fact: ci:assets (type: non_fraction)
+	// Output: Fact: assets (namespace: http://www.xbrl.org/us/gaap/ci/2003/usfr-ci-2003, type: non_fraction)
 	//       727 shares on 2021-04-16
 }
+
+func ExampleParse() {
+	processed, err := xbrl.Parse([]byte(doc))
+	if err != nil {
+		panic(err)
+	}
+	if err := processed.Validate(); err != nil {
+		panic(err)
+	}
+
+	fmt.Println(len(processed.Facts))
+
+	// Output: 1
+}
diff --git a/fact.go b/fact.go
index c65a174..361d26c 100644
--- a/fact.go
+++ b/fact.go
@@ -6,6 +6,7 @@ import (
 	"strconv"
 )
 
+// FactType describes the structural category of a parsed Fact.
 type FactType string
 
 const (
@@ -17,7 +18,7 @@ const (
 	// A non-numeric fact is guaranteed to have an XMLName, ContextRef, and ValueStr.
 	FactTypeNonNumeric FactType = "non_numeric"
 
-	// FactTypeNonFraction is a non-nil fact describing a numeric value that can precisely expressed as a simple value.
+	// FactTypeNonFraction is a non-nil fact describing a numeric value that can be precisely expressed as a simple value.
 	// A non-fraction fact is guaranteed to have an XMLName, ContextRef, UnitRef, ValueStr, and exactly one of Precision or Decimals.
 	//
 	// For example: <ci:capitalLeases contextRef="c1" unitRef="u1" precision="3">727432</ci:capitalLeases>
@@ -119,38 +120,115 @@ func (f Fact) Type() FactType {
 	return FactTypeNonNumeric
 }
 
-// IsValid confirms that f has at least the required fields that the FactType requires.
-// Note that this function is not strict about extra fields existing.
+// IsValid confirms that f has the structural fields required for its FactType.
 func (f Fact) IsValid() bool {
+	return f.Validate() == nil
+}
+
+// Validate checks that f has the structural fields required for its FactType.
+// It does not perform taxonomy-aware validation.
+func (f Fact) Validate() error {
 	// All facts must have a context ref
 	if f.ContextRef == "" {
-		return false
+		return errors.New("missing contextRef")
 	}
 
 	// Some types have particular rules beyond what Type() checks for that must be true to be considered valid.
 	switch f.Type() {
 	case FactTypeFraction:
-		// Fraction must have a non-zero Denominator
-		return *f.Denominator != 0
+		if f.UnitRef == nil || *f.UnitRef == "" {
+			return errors.New("fraction fact missing unitRef")
+		}
+		if f.Numerator == nil {
+			return errors.New("fraction fact missing numerator")
+		}
+		if f.Denominator == nil {
+			return errors.New("fraction fact missing denominator")
+		}
+		if *f.Denominator == 0 {
+			return errors.New("fraction fact denominator is zero")
+		}
+		if f.Precision != nil || f.Decimals != nil {
+			return errors.New("fraction fact cannot have precision or decimals")
+		}
 	case FactTypeNonFraction:
-		// NonFractions must have either a non-nil Precision or non-nil Decimals field
-		return (f.Precision == nil) != (f.Decimals == nil)
+		if f.UnitRef == nil || *f.UnitRef == "" {
+			return errors.New("non-fraction fact missing unitRef")
+		}
+		if f.ValueStr == nil {
+			return errors.New("non-fraction fact missing value")
+		}
+		if f.Numerator != nil || f.Denominator != nil {
+			return errors.New("non-fraction fact cannot have numerator or denominator")
+		}
+		if (f.Precision == nil) == (f.Decimals == nil) {
+			return errors.New("non-fraction fact must have exactly one of precision or decimals")
+		}
+		if f.Precision != nil && !isValidPrecision(*f.Precision) {
+			return errors.New("non-fraction fact has invalid precision")
+		}
+		if f.Decimals != nil && !isValidDecimals(*f.Decimals) {
+			return errors.New("non-fraction fact has invalid decimals")
+		}
+		if _, err := strconv.ParseFloat(*f.ValueStr, 64); err != nil {
+			return err
+		}
 	case FactTypeNonNumeric:
-		return f.ValueStr != nil
-	default:
+		if f.ValueStr == nil {
+			return errors.New("non-numeric fact missing value")
+		}
+		if f.UnitRef != nil {
+			return errors.New("non-numeric fact cannot have unitRef")
+		}
+		if f.Precision != nil || f.Decimals != nil {
+			return errors.New("non-numeric fact cannot have precision or decimals")
+		}
+		if f.Numerator != nil || f.Denominator != nil {
+			return errors.New("non-numeric fact cannot have numerator or denominator")
+		}
+	}
+
+	return nil
+}
+
+func isValidPrecision(precision string) bool {
+	if precision == "INF" {
+		return true
+	}
+
+	value, err := strconv.Atoi(precision)
+	return err == nil && value >= 0
+}
+
+func isValidDecimals(decimals string) bool {
+	if decimals == "INF" {
 		return true
 	}
+
+	_, err := strconv.Atoi(decimals)
+	return err == nil
 }
 
 // NumericValue attempts to return the numeric value this fact represents.
-// This function returns
 // If this fact is a fraction type, this function returns the value of numerator / denominator.
 // Note that fraction type facts generally cannot be precisely represented as a float64 and may have some rounding error.
 func (f Fact) NumericValue() (float64, error) {
 	switch f.Type() {
 	case FactTypeFraction:
+		if f.Numerator == nil {
+			return 0, errors.New("fraction fact missing numerator")
+		}
+		if f.Denominator == nil {
+			return 0, errors.New("fraction fact missing denominator")
+		}
+		if *f.Denominator == 0 {
+			return 0, errors.New("fraction fact denominator is zero")
+		}
 		return *f.Numerator / *f.Denominator, nil
 	case FactTypeNonFraction:
+		if f.ValueStr == nil {
+			return 0, errors.New("non-fraction fact missing value")
+		}
 		return strconv.ParseFloat(*f.ValueStr, 64)
 	default:
 		return 0, ErrNonNumericFactType
diff --git a/fact_test.go b/fact_test.go
index 70f77b4..8f7baae 100644
--- a/fact_test.go
+++ b/fact_test.go
@@ -108,3 +108,135 @@ func TestUnmarshalFact(t *testing.T) {
 		assert.EqualValues(t, 1.0/3.0, val)
 	})
 }
+
+func TestFactValidation(t *testing.T) {
+	unitRef := "u1"
+	precision := "3"
+	decimals := "2"
+	invalidPrecision := "-1"
+	invalidDecimals := "not-an-integer"
+
+	tests := []struct {
+		name    string
+		fact    Fact
+		wantErr string
+	}{
+		{
+			name: "fact requires context ref",
+			fact: Fact{
+				UnitRef:   &unitRef,
+				Precision: &precision,
+				ValueStr:  stringPtr("727"),
+			},
+			wantErr: "missing contextRef",
+		},
+		{
+			name: "non-fraction missing value",
+			fact: Fact{
+				ContextRef: "c1",
+				UnitRef:    &unitRef,
+				Precision:  &precision,
+			},
+			wantErr: "non-fraction fact missing value",
+		},
+		{
+			name: "non-fraction with precision and decimals",
+			fact: Fact{
+				ContextRef: "c1",
+				UnitRef:    &unitRef,
+				Precision:  &precision,
+				Decimals:   &decimals,
+				ValueStr:   stringPtr("727"),
+			},
+			wantErr: "non-fraction fact must have exactly one of precision or decimals",
+		},
+		{
+			name: "non-fraction with invalid precision",
+			fact: Fact{
+				ContextRef: "c1",
+				UnitRef:    &unitRef,
+				Precision:  &invalidPrecision,
+				ValueStr:   stringPtr("727"),
+			},
+			wantErr: "non-fraction fact has invalid precision",
+		},
+		{
+			name: "non-fraction with invalid decimals",
+			fact: Fact{
+				ContextRef: "c1",
+				UnitRef:    &unitRef,
+				Decimals:   &invalidDecimals,
+				ValueStr:   stringPtr("727"),
+			},
+			wantErr: "non-fraction fact has invalid decimals",
+		},
+		{
+			name: "non-numeric with precision",
+			fact: Fact{
+				ContextRef: "c1",
+				Precision:  &precision,
+				ValueStr:   stringPtr("not numeric"),
+			},
+			wantErr: "non-numeric fact cannot have precision or decimals",
+		},
+		{
+			name: "fraction with zero denominator",
+			fact: Fact{
+				ContextRef:  "c1",
+				UnitRef:     &unitRef,
+				Numerator:   floatPtr(1),
+				Denominator: floatPtr(0),
+			},
+			wantErr: "fraction fact denominator is zero",
+		},
+		{
+			name: "fraction with precision",
+			fact: Fact{
+				ContextRef:  "c1",
+				UnitRef:     &unitRef,
+				Precision:   &precision,
+				Numerator:   floatPtr(1),
+				Denominator: floatPtr(3),
+			},
+			wantErr: "fraction fact cannot have precision or decimals",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.fact.Validate()
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.wantErr)
+			assert.False(t, tt.fact.IsValid())
+		})
+	}
+}
+
+func TestNilFactValidation(t *testing.T) {
+	nilValue := true
+	fact := Fact{
+		XMLName:    xml.Name{Space: "myns", Local: "nilFact"},
+		Nil:        &nilValue,
+		ContextRef: "c1",
+	}
+
+	assert.Equal(t, FactTypeNil, fact.Type())
+	assert.NoError(t, fact.Validate())
+}
+
+func TestNumericValueMalformedFactReturnsError(t *testing.T) {
+	unitRef := "u1"
+	precision := "3"
+	fact := Fact{
+		ContextRef: "c1",
+		UnitRef:    &unitRef,
+		Precision:  &precision,
+	}
+
+	_, err := fact.NumericValue()
+	assert.Error(t, err)
+}
+
+func floatPtr(val float64) *float64 {
+	return &val
+}
diff --git a/fuzz_test.go b/fuzz_test.go
new file mode 100644
index 0000000..f7370a2
--- /dev/null
+++ b/fuzz_test.go
@@ -0,0 +1,21 @@
+//go:build go1.18
+// +build go1.18
+
+package xbrl
+
+import "testing"
+
+func FuzzParseAndValidate(f *testing.F) {
+	f.Add([]byte(`<xbrl/>`))
+	f.Add([]byte(`<xbrl><context id="c1"><entity><identifier scheme="s">e</identifier></entity><period><forever/></period></context></xbrl>`))
+	f.Add([]byte(`<xbrl><ci:assets contextRef="missing" unitRef="u1" precision="3">727</ci:assets></xbrl>`))
+
+	f.Fuzz(func(t *testing.T, data []byte) {
+		doc, err := Parse(data)
+		if err != nil {
+			return
+		}
+
+		_ = doc.Validate()
+	})
+}
diff --git a/parse.go b/parse.go
new file mode 100644
index 0000000..5d20fe6
--- /dev/null
+++ b/parse.go
@@ -0,0 +1,44 @@
+package xbrl
+
+import (
+	"encoding/xml"
+	"errors"
+	"io"
+)
+
+// Parse unmarshals an XBRL instance document from data.
+// It does not call XBRL.Validate; parsing and structural validation are separate operations.
+func Parse(data []byte) (XBRL, error) {
+	var doc XBRL
+	if err := xml.Unmarshal(data, &doc); err != nil {
+		return XBRL{}, err
+	}
+
+	return doc, nil
+}
+
+// ParseReader decodes an XBRL instance document from r using encoding/xml.
+// It does not call XBRL.Validate.
+func ParseReader(r io.Reader) (XBRL, error) {
+	if r == nil {
+		return XBRL{}, errors.New("nil reader")
+	}
+
+	return Decode(xml.NewDecoder(r))
+}
+
+// Decode decodes an XBRL instance document with decoder.
+// Use this helper when callers need to configure xml.Decoder, such as setting CharsetReader.
+// It does not call XBRL.Validate.
+func Decode(decoder *xml.Decoder) (XBRL, error) {
+	if decoder == nil {
+		return XBRL{}, errors.New("nil decoder")
+	}
+
+	var doc XBRL
+	if err := decoder.Decode(&doc); err != nil {
+		return XBRL{}, err
+	}
+
+	return doc, nil
+}
diff --git a/parse_test.go b/parse_test.go
new file mode 100644
index 0000000..471e255
--- /dev/null
+++ b/parse_test.go
@@ -0,0 +1,56 @@
+package xbrl
+
+import (
+	"encoding/xml"
+	"io"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestParseHelpers(t *testing.T) {
+	t.Run("Parse does not validate", func(t *testing.T) {
+		xbrlBytes, err := os.ReadFile("test_data/invalid_xbrl.xml")
+		require.NoError(t, err)
+
+		doc, err := Parse(xbrlBytes)
+		require.NoError(t, err)
+
+		assert.Error(t, doc.Validate())
+	})
+
+	t.Run("ParseReader", func(t *testing.T) {
+		doc, err := ParseReader(strings.NewReader(`<xbrl/>`))
+		require.NoError(t, err)
+
+		assert.Equal(t, xml.Name{Local: "xbrl"}, doc.XMLName)
+	})
+
+	t.Run("Decode uses caller configured decoder", func(t *testing.T) {
+		f, err := os.Open("test_data/edgr-2004_10k.xml")
+		require.NoError(t, err)
+		defer f.Close()
+
+		decoder := xml.NewDecoder(f)
+		decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
+			return input, nil
+		}
+
+		doc, err := Decode(decoder)
+		require.NoError(t, err)
+
+		assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "xbrl"}, doc.XMLName)
+		assert.Len(t, doc.ContextsByID, 4)
+	})
+
+	t.Run("nil inputs", func(t *testing.T) {
+		_, err := ParseReader(nil)
+		assert.EqualError(t, err, "nil reader")
+
+		_, err = Decode(nil)
+		assert.EqualError(t, err, "nil decoder")
+	})
+}
diff --git a/unit.go b/unit.go
index 2aa3187..53a28b3 100644
--- a/unit.go
+++ b/unit.go
@@ -1,13 +1,18 @@
 package xbrl
 
-import "strings"
+import (
+	"errors"
+	"strings"
+)
 
 // Unit specifies the unit in which a numeric fact has been measured.
 // A Unit can be either a simple measure, product of measures, or a ratio of products of measures with a numerator and a denominator.
 //
 // A simple unit that represents shares looks like:
 // <unit>
-//     <measure>shares</measure>
+//
+//	<measure>shares</measure>
+//
 // </unit>
 //
 // Numeric Facts reference units by ID via the Fact's `unitRef` attribute.
@@ -18,17 +23,19 @@ type Unit struct {
 	Divide   *Divide  `xml:"divide"`
 }
 
-// Divide represents a ratios of Units that has a numerator and a denominator.
+// Divide represents a ratio of units that has a numerator and a denominator.
 // For example, XBRL can represent a complex unit like earnings per share (EPS) as dollars per share (USD / share):
 // <unit>
-//     <divide>
-//	       <unitNumerator>
-//             <measure>iso4127:USD</measure>
-//         </unitNumerator>
-//         <unitDenominator>
-//             <measure>shares</measure>
-//         </unitDenominator>
-//     </divide>
+//
+//	    <divide>
+//		       <unitNumerator>
+//	            <measure>iso4217:USD</measure>
+//	        </unitNumerator>
+//	        <unitDenominator>
+//	            <measure>shares</measure>
+//	        </unitDenominator>
+//	    </divide>
+//
 // </unit>
 //
 // https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.8.2
@@ -47,22 +54,103 @@ type Measure struct {
 	Value string `xml:",chardata"`
 }
 
+// Measures is a product of one or more Measure values.
 type Measures []Measure
 
+// Validate checks that u has the structural fields required by XBRL.
+func (u Unit) Validate() error {
+	if u.ID == "" {
+		return errors.New("unit missing id")
+	}
+	if (len(u.Measures) == 0) == (u.Divide == nil) {
+		return errors.New("unit must have either measures or divide")
+	}
+
+	if u.Divide != nil {
+		return u.Divide.Validate()
+	}
+
+	return u.Measures.Validate()
+}
+
+// IsValid validates u and returns true if no error was found.
+func (u Unit) IsValid() bool {
+	return u.Validate() == nil
+}
+
+// Validate checks that d has numerator and denominator measures.
+func (d Divide) Validate() error {
+	if len(d.Numerator) == 0 {
+		return errors.New("divide missing numerator measures")
+	}
+	if len(d.Denominator) == 0 {
+		return errors.New("divide missing denominator measures")
+	}
+	if err := d.Numerator.Validate(); err != nil {
+		return err
+	}
+	if err := d.Denominator.Validate(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// IsValid validates d and returns true if no error was found.
+func (d Divide) IsValid() bool {
+	return d.Validate() == nil
+}
+
+// Validate checks that m contains a non-empty measure value.
+func (m Measure) Validate() error {
+	if m.Value == "" {
+		return errors.New("measure missing value")
+	}
+
+	return nil
+}
+
+// IsValid validates m and returns true if no error was found.
+func (m Measure) IsValid() bool {
+	return m.Validate() == nil
+}
+
+// Validate checks that m has at least one measure and that each measure has a value.
+func (m Measures) Validate() error {
+	if len(m) == 0 {
+		return errors.New("measures missing values")
+	}
+
+	for _, measure := range m {
+		if err := measure.Validate(); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// IsValid validates m and returns true if no error was found.
+func (m Measures) IsValid() bool {
+	return m.Validate() == nil
+}
+
 // String returns a human readable representation of the Unit.
 func (u Unit) String() string {
-	// If the Divide element is not nil, there can be no top-level Meaures.
+	// If the Divide element is not nil, there can be no top-level Measures.
 	if u.Divide != nil {
 		return u.Divide.Numerator.String() + " / " + u.Divide.Denominator.String()
 	}
 
-	// If the divider element is nil, there must be 1+ top-level Measures.
+	// If the Divide element is nil, there must be 1+ top-level Measures.
 	return u.Measures.String()
 }
 
 // String returns the local name of the measure if the value is formatted as 'xsd:Qname', otherwise the value itself is returned.
-// Ex: `<measure>iso4127:USD</measure>` -> "USD"
-//     `<measure>shares</measure>`      -> "shares"
+// This is a display helper only. Use Measure.Value when the raw XBRL value is significant.
+// Ex: `<measure>iso4217:USD</measure>` -> "USD"
+//
+//	`<measure>shares</measure>`      -> "shares"
 func (m Measure) String() string {
 	if index := strings.IndexRune(m.Value, ':'); index != -1 && index < len(m.Value) {
 		return m.Value[index+1 : len(m.Value)]
diff --git a/unit_test.go b/unit_test.go
index 541c0b3..ab44587 100644
--- a/unit_test.go
+++ b/unit_test.go
@@ -106,3 +106,68 @@ func TestUnmarshalUnit(t *testing.T) {
 		assert.Equal(t, "USD / feet * feet", unit.String())
 	})
 }
+
+func TestUnitValidation(t *testing.T) {
+	tests := []struct {
+		name    string
+		unit    Unit
+		wantErr string
+	}{
+		{
+			name:    "unit requires id",
+			unit:    Unit{Measures: Measures{{Value: "shares"}}},
+			wantErr: "unit missing id",
+		},
+		{
+			name:    "unit requires measure or divide",
+			unit:    Unit{ID: "u1"},
+			wantErr: "unit must have either measures or divide",
+		},
+		{
+			name: "unit cannot have measures and divide",
+			unit: Unit{
+				ID:       "u1",
+				Measures: Measures{{Value: "shares"}},
+				Divide: &Divide{
+					Numerator:   Measures{{Value: "iso4217:USD"}},
+					Denominator: Measures{{Value: "shares"}},
+				},
+			},
+			wantErr: "unit must have either measures or divide",
+		},
+		{
+			name: "divide requires denominator measures",
+			unit: Unit{
+				ID: "u1",
+				Divide: &Divide{
+					Numerator: Measures{{Value: "iso4217:USD"}},
+				},
+			},
+			wantErr: "divide missing denominator measures",
+		},
+		{
+			name: "measure requires value",
+			unit: Unit{
+				ID:       "u1",
+				Measures: Measures{{}},
+			},
+			wantErr: "measure missing value",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.unit.Validate()
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.wantErr)
+			assert.False(t, tt.unit.IsValid())
+		})
+	}
+}
+
+func TestMeasureValuePreservesRawQName(t *testing.T) {
+	measure := Measure{Value: "iso4217:USD"}
+
+	assert.Equal(t, "iso4217:USD", measure.Value)
+	assert.Equal(t, "USD", measure.String())
+}
diff --git a/xbrl.go b/xbrl.go
index b41a692..362be86 100644
--- a/xbrl.go
+++ b/xbrl.go
@@ -5,22 +5,44 @@ import (
 	"fmt"
 )
 
-// NotImplemented represents an expected element in the XBRL that isn't handled yet, but should not be considered a Fact.
+const (
+	xbrlInstanceNamespace = "http://www.xbrl.org/2003/instance"
+)
+
+// NotImplemented represents a count of expected XBRL elements that are not handled in detail.
+// New code should prefer the RawElement fields that preserve XML names, attributes, and inner XML.
 type NotImplemented []*struct{}
 
+// RawElement preserves an XML element that this package does not model in detail.
+type RawElement struct {
+	XMLName    xml.Name
+	Attributes []xml.Attr `xml:",any,attr"`
+	InnerXML   string     `xml:",innerxml"`
+}
+
 // RawXBRL represents the XML structure of an XBRL document.
 // This is not a feature complete XBRL parser!
-// See the fields of type NotImplemented for an idea of what's missing.
+// See the fields of type RawElement and NotImplemented for an idea of what's missing.
 // Also note that this struct doesn't support Tuple facts (https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.9)
 //
 // You can use this struct directly, but XBRL is structured in a more convenient way.
 // See the comment on XBRL for more info.
 type RawXBRL struct {
+	XMLName    xml.Name
+	Attributes []xml.Attr
+
 	Contexts []Context `xml:"context"`
 	Units    []Unit    `xml:"unit"`
 
 	Facts []Fact `xml:",any"`
 
+	SchemaRefs          []RawElement
+	LinkbaseRefs        []RawElement
+	RoleRefs            []RawElement
+	ArcRoleRefs         []RawElement
+	FootnoteLinks       []RawElement
+	UnsupportedTopLevel []RawElement
+
 	// The fields below are not properly implemented, but need to be here so they aren't lumped into the `Facts` slice.
 
 	SchemaRef    NotImplemented `xml:"schemaRef"`
@@ -30,14 +52,160 @@ type RawXBRL struct {
 	FootnoteLink NotImplemented `xml:"footnoteLink"`
 }
 
-// XBRL contains maps for contexts and units so they can be accessed easier when looping through facts.
+// UnmarshalXML implements xml.Unmarshaler and preserves the XBRL root envelope
+// while still routing taxonomy-defined top-level elements into Facts.
+func (r *RawXBRL) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+	*r = RawXBRL{
+		XMLName:    start.Name,
+		Attributes: copyAttrs(start.Attr),
+	}
+
+	for {
+		token, err := d.Token()
+		if err != nil {
+			return err
+		}
+
+		switch token := token.(type) {
+		case xml.StartElement:
+			if err := r.decodeChild(d, token); err != nil {
+				return err
+			}
+		case xml.EndElement:
+			if token.Name == start.Name {
+				return nil
+			}
+		}
+	}
+}
+
+func (r *RawXBRL) decodeChild(d *xml.Decoder, start xml.StartElement) error {
+	switch start.Name.Local {
+	case "context":
+		var context Context
+		if err := d.DecodeElement(&context, &start); err != nil {
+			return err
+		}
+		r.Contexts = append(r.Contexts, context)
+	case "unit":
+		var unit Unit
+		if err := d.DecodeElement(&unit, &start); err != nil {
+			return err
+		}
+		r.Units = append(r.Units, unit)
+	case "schemaRef":
+		element, err := decodeRawElement(d, start)
+		if err != nil {
+			return err
+		}
+		r.SchemaRefs = append(r.SchemaRefs, element)
+		r.SchemaRef = append(r.SchemaRef, &struct{}{})
+	case "linkbaseRef":
+		element, err := decodeRawElement(d, start)
+		if err != nil {
+			return err
+		}
+		r.LinkbaseRefs = append(r.LinkbaseRefs, element)
+		r.LinkbaseRef = append(r.LinkbaseRef, &struct{}{})
+	case "roleRef":
+		element, err := decodeRawElement(d, start)
+		if err != nil {
+			return err
+		}
+		r.RoleRefs = append(r.RoleRefs, element)
+		r.RoleRef = append(r.RoleRef, &struct{}{})
+	case "arcroleRef":
+		element, err := decodeRawElement(d, start)
+		if err != nil {
+			return err
+		}
+		r.ArcRoleRefs = append(r.ArcRoleRefs, element)
+		r.ArcRoleRef = append(r.ArcRoleRef, &struct{}{})
+	case "footnoteLink":
+		element, err := decodeRawElement(d, start)
+		if err != nil {
+			return err
+		}
+		r.FootnoteLinks = append(r.FootnoteLinks, element)
+		r.FootnoteLink = append(r.FootnoteLink, &struct{}{})
+	default:
+		if isKnownUnsupportedTopLevel(start.Name) {
+			element, err := decodeRawElement(d, start)
+			if err != nil {
+				return err
+			}
+			r.UnsupportedTopLevel = append(r.UnsupportedTopLevel, element)
+			return nil
+		}
+
+		var fact Fact
+		if err := d.DecodeElement(&fact, &start); err != nil {
+			return err
+		}
+		r.Facts = append(r.Facts, fact)
+	}
+
+	return nil
+}
+
+func isKnownUnsupportedTopLevel(name xml.Name) bool {
+	return name.Space == xbrlInstanceNamespace && (name.Local == "item" || name.Local == "tuple")
+}
+
+func decodeRawElement(d *xml.Decoder, start xml.StartElement) (RawElement, error) {
+	var element RawElement
+	if err := d.DecodeElement(&element, &start); err != nil {
+		return RawElement{}, err
+	}
+
+	return element, nil
+}
+
+func copyAttrs(attrs []xml.Attr) []xml.Attr {
+	if len(attrs) == 0 {
+		return nil
+	}
+
+	copied := make([]xml.Attr, len(attrs))
+	copy(copied, attrs)
+	return copied
+}
+
+func copyRawElements(elements []RawElement) []RawElement {
+	if len(elements) == 0 {
+		return nil
+	}
+
+	copied := make([]RawElement, len(elements))
+	for index, element := range elements {
+		copied[index] = element
+		copied[index].Attributes = copyAttrs(element.Attributes)
+	}
+
+	return copied
+}
+
+// XBRL contains raw context and unit slices plus maps so contexts and units can be accessed easier when looping through facts.
 // You can either unmarshal XML directly into this struct (it has a custom unmarshaller),
 // or you can unmarshal XML into a RawXBRL struct and call NewProcessedXBRL(RawXBRL) to process the raw XBRL into this format.
 type XBRL struct {
+	XMLName    xml.Name
+	Attributes []xml.Attr
+
+	Contexts []Context
+	Units    []Unit
+
 	ContextsByID map[string]Context
 	UnitsByID    map[string]Unit
 
 	Facts []Fact
+
+	SchemaRefs          []RawElement
+	LinkbaseRefs        []RawElement
+	RoleRefs            []RawElement
+	ArcRoleRefs         []RawElement
+	FootnoteLinks       []RawElement
+	UnsupportedTopLevel []RawElement
 }
 
 // NewProcessedXBRL constructs a XBRL struct from a RawXBRL struct.
@@ -54,9 +222,19 @@ func NewProcessedXBRL(raw RawXBRL) XBRL {
 	}
 
 	return XBRL{
-		ContextsByID: contextsByID,
-		UnitsByID:    unitsByID,
-		Facts:        raw.Facts,
+		XMLName:             raw.XMLName,
+		Attributes:          copyAttrs(raw.Attributes),
+		Contexts:            append([]Context(nil), raw.Contexts...),
+		Units:               append([]Unit(nil), raw.Units...),
+		ContextsByID:        contextsByID,
+		UnitsByID:           unitsByID,
+		Facts:               append([]Fact(nil), raw.Facts...),
+		SchemaRefs:          copyRawElements(raw.SchemaRefs),
+		LinkbaseRefs:        copyRawElements(raw.LinkbaseRefs),
+		RoleRefs:            copyRawElements(raw.RoleRefs),
+		ArcRoleRefs:         copyRawElements(raw.ArcRoleRefs),
+		FootnoteLinks:       copyRawElements(raw.FootnoteLinks),
+		UnsupportedTopLevel: copyRawElements(raw.UnsupportedTopLevel),
 	}
 }
 
@@ -72,20 +250,35 @@ func (x *XBRL) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
 	return nil
 }
 
-// Validate checks that all Facts are valid and reference contexts and units that also exist.
-// Note that since this parser does not properly handle Tuple elements, it's possible that some malformed Facts were unmarshalled.
+// Validate checks basic XBRL structure: contexts, units, facts, duplicate IDs, and fact references.
+// It does not perform taxonomy-aware, accounting, linkbase, footnote, tuple, or scenario validation.
 func (x XBRL) Validate() error {
+	contextsByID, err := x.validatedContextsByID()
+	if err != nil {
+		return err
+	}
+
+	unitsByID, err := x.validatedUnitsByID()
+	if err != nil {
+		return err
+	}
+
+	if len(x.UnsupportedTopLevel) > 0 {
+		element := x.UnsupportedTopLevel[0]
+		return fmt.Errorf("unsupported top-level element: %s:%s", element.XMLName.Space, element.XMLName.Local)
+	}
+
 	for _, fact := range x.Facts {
-		if !fact.IsValid() {
-			return fmt.Errorf("invalid fact: %s:%s", fact.XMLName.Space, fact.XMLName.Local)
+		if err := fact.Validate(); err != nil {
+			return fmt.Errorf("invalid fact (%s:%s): %w", fact.XMLName.Space, fact.XMLName.Local, err)
 		}
 
-		if _, exists := x.ContextsByID[fact.ContextRef]; !exists {
+		if _, exists := contextsByID[fact.ContextRef]; !exists {
 			return fmt.Errorf("fact (%s:%s) references non-existent context: %s", fact.XMLName.Space, fact.XMLName.Local, fact.ContextRef)
 		}
 
 		if fact.UnitRef != nil {
-			if _, exists := x.UnitsByID[*fact.UnitRef]; !exists {
+			if _, exists := unitsByID[*fact.UnitRef]; !exists {
 				return fmt.Errorf("fact (%s:%s) references non-existent unit: %s", fact.XMLName.Space, fact.XMLName.Local, *fact.UnitRef)
 			}
 		}
@@ -94,7 +287,47 @@ func (x XBRL) Validate() error {
 	return nil
 }
 
-// IsValid validates the Facts in this struct and returns true if no error was found.
+// IsValid validates this struct and returns true if no error was found.
 func (x XBRL) IsValid() bool {
 	return x.Validate() == nil
 }
+
+func (x XBRL) validatedContextsByID() (map[string]Context, error) {
+	if len(x.Contexts) == 0 {
+		return x.ContextsByID, nil
+	}
+
+	contextsByID := make(map[string]Context, len(x.Contexts))
+	for _, context := range x.Contexts {
+		if err := context.Validate(); err != nil {
+			return nil, fmt.Errorf("invalid context (%s): %w", context.ID, err)
+		}
+		if _, exists := contextsByID[context.ID]; exists {
+			return nil, fmt.Errorf("duplicate context id: %s", context.ID)
+		}
+
+		contextsByID[context.ID] = context
+	}
+
+	return contextsByID, nil
+}
+
+func (x XBRL) validatedUnitsByID() (map[string]Unit, error) {
+	if len(x.Units) == 0 {
+		return x.UnitsByID, nil
+	}
+
+	unitsByID := make(map[string]Unit, len(x.Units))
+	for _, unit := range x.Units {
+		if err := unit.Validate(); err != nil {
+			return nil, fmt.Errorf("invalid unit (%s): %w", unit.ID, err)
+		}
+		if _, exists := unitsByID[unit.ID]; exists {
+			return nil, fmt.Errorf("duplicate unit id: %s", unit.ID)
+		}
+
+		unitsByID[unit.ID] = unit
+	}
+
+	return unitsByID, nil
+}
diff --git a/xbrl_test.go b/xbrl_test.go
index 090f580..58eda94 100644
--- a/xbrl_test.go
+++ b/xbrl_test.go
@@ -26,6 +26,29 @@ func TestUnmarshalXBRL(t *testing.T) {
 		assert.Equal(t, 283, len(content.ContextsByID))
 		assert.Equal(t, 9, len(content.UnitsByID))
 		assert.Equal(t, 1070, len(content.Facts))
+		require.Len(t, content.SchemaRefs, 1)
+		assert.Equal(t, "aapl-20210327.xsd", requireAttr(t, content.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+
+		durationContext := content.ContextsByID["i02c0f3e92d75432fbe3c6a24022bf7b0_D20200927-20210327"]
+		assert.Equal(t, PeriodTypeDuration, durationContext.Period.Type())
+		assert.Equal(t, "2020-09-27", *durationContext.Period.StartDate)
+		assert.Equal(t, "2021-03-27", *durationContext.Period.EndDate)
+
+		segmentedContext := content.ContextsByID["iff44040cd61344d085f7a2b7a1076cb1_D20200927-20210327"]
+		require.Len(t, segmentedContext.Entity.Segments, 1)
+		assert.Equal(t, xml.Name{Space: "http://xbrl.org/2006/xbrldi", Local: "explicitMember"}, segmentedContext.Entity.Segments[0].XMLName)
+		assert.Equal(t, "us-gaap:CommonStockMember", segmentedContext.Entity.Segments[0].Value)
+		assert.Equal(t, "us-gaap:StatementClassOfStockAxis", requireAttr(t, segmentedContext.Entity.Segments[0].Attributes, "", "dimension"))
+
+		usdPerShare := content.UnitsByID["usdPerShare"]
+		require.NotNil(t, usdPerShare.Divide)
+		assert.Equal(t, "iso4217:USD", usdPerShare.Divide.Numerator[0].Value)
+		assert.Equal(t, "USD / shares", usdPerShare.String())
+
+		eps := requireFact(t, content.Facts, "http://fasb.org/us-gaap/2020-01-31", "EarningsPerShareBasic", "1.41")
+		assert.Equal(t, "ia09408265617434fbc06a7e4c6b101bc_D20201227-20210327", eps.ContextRef)
+		require.NotNil(t, eps.UnitRef)
+		assert.Equal(t, "usdPerShare", *eps.UnitRef)
 	})
 
 	t.Run("real-world xbrl from 2004", func(t *testing.T) {
@@ -46,6 +69,20 @@ func TestUnmarshalXBRL(t *testing.T) {
 		assert.Equal(t, 4, len(content.ContextsByID))
 		assert.Equal(t, 2, len(content.UnitsByID))
 		assert.Equal(t, 154, len(content.Facts))
+		require.Len(t, content.SchemaRefs, 1)
+		assert.Equal(t, "edgr-20050228.xsd", requireAttr(t, content.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+
+		firstFact := content.Facts[0]
+		assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/us/fr/common/pte/2005-02-28", Local: "AccountsPayable"}, firstFact.XMLName)
+		assert.Equal(t, "edgr_4473_inst_YTD_20041231", firstFact.ContextRef)
+		require.NotNil(t, firstFact.UnitRef)
+		assert.Equal(t, "USD", *firstFact.UnitRef)
+		assert.Equal(t, "995000", firstFact.Value())
+
+		instantContext := content.ContextsByID["edgr_4473_inst_YTD_20041231"]
+		assert.Equal(t, PeriodTypeInstant, instantContext.Period.Type())
+		assert.Equal(t, "2004-12-31", *instantContext.Period.Instant)
+		assert.Equal(t, "USD", content.UnitsByID["USD"].String())
 	})
 
 	t.Run("simple xbrl happy path", func(t *testing.T) {
@@ -57,6 +94,13 @@ func TestUnmarshalXBRL(t *testing.T) {
 		require.NoError(t, xml.Unmarshal(xbrlBytes, &content))
 		require.NoError(t, content.Validate())
 
+		assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/instance", Local: "xbrl"}, content.XMLName)
+		assert.NotEmpty(t, content.Attributes)
+		require.Len(t, content.SchemaRefs, 1)
+		assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/linkbase", Local: "schemaRef"}, content.SchemaRefs[0].XMLName)
+		assert.Len(t, content.Contexts, 1)
+		assert.Len(t, content.Units, 1)
+
 		require.Len(t, content.ContextsByID, 1)
 		expectedContext := Context{
 			ID: "c1",
@@ -111,6 +155,148 @@ func TestUnmarshalXBRL(t *testing.T) {
 	})
 }
 
+func TestUnmarshalRawXBRLPreservesEnvelope(t *testing.T) {
+	xbrlBytes, err := os.ReadFile("test_data/simple_xbrl.xml")
+	require.NoError(t, err)
+
+	var raw RawXBRL
+	require.NoError(t, xml.Unmarshal(xbrlBytes, &raw))
+
+	assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/instance", Local: "xbrl"}, raw.XMLName)
+	assert.NotEmpty(t, raw.Attributes)
+	require.Len(t, raw.SchemaRefs, 1)
+	assert.Equal(t, xml.Name{Space: "http://www.xbrl.org/2003/linkbase", Local: "schemaRef"}, raw.SchemaRefs[0].XMLName)
+	assert.NotEmpty(t, raw.SchemaRefs[0].Attributes)
+	assert.Len(t, raw.SchemaRef, 1)
+	assert.Len(t, raw.Facts, 2)
+}
+
+func TestUnmarshalRawXBRLPreservesReferenceAndUnsupportedElements(t *testing.T) {
+	// language=xml
+	doc := `<xbrl xmlns="http://www.xbrl.org/2003/instance"
+    xmlns:link="http://www.xbrl.org/2003/linkbase"
+    xmlns:xlink="http://www.w3.org/1999/xlink">
+    <link:schemaRef xlink:type="simple" xlink:href="example.xsd"/>
+    <link:linkbaseRef xlink:type="simple" xlink:href="labels.xml"/>
+    <link:roleRef roleURI="http://example.com/role" xlink:href="roles.xml#role"/>
+    <link:arcroleRef arcroleURI="http://example.com/arcrole" xlink:href="arcs.xml#arc"/>
+    <link:footnoteLink>
+        <link:footnote xlink:label="f1">Preserved footnote text</link:footnote>
+    </link:footnoteLink>
+    <item id="baseItem"/>
+    <tuple id="baseTuple"><child>nested</child></tuple>
+</xbrl>`
+
+	var raw RawXBRL
+	require.NoError(t, xml.Unmarshal([]byte(doc), &raw))
+
+	require.Len(t, raw.SchemaRefs, 1)
+	assert.Equal(t, "example.xsd", requireAttr(t, raw.SchemaRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+	assert.Len(t, raw.SchemaRef, 1)
+
+	require.Len(t, raw.LinkbaseRefs, 1)
+	assert.Equal(t, "labels.xml", requireAttr(t, raw.LinkbaseRefs[0].Attributes, "http://www.w3.org/1999/xlink", "href"))
+	assert.Len(t, raw.LinkbaseRef, 1)
+
+	require.Len(t, raw.RoleRefs, 1)
+	assert.Equal(t, "http://example.com/role", requireAttr(t, raw.RoleRefs[0].Attributes, "", "roleURI"))
+	assert.Len(t, raw.RoleRef, 1)
+
+	require.Len(t, raw.ArcRoleRefs, 1)
+	assert.Equal(t, "http://example.com/arcrole", requireAttr(t, raw.ArcRoleRefs[0].Attributes, "", "arcroleURI"))
+	assert.Len(t, raw.ArcRoleRef, 1)
+
+	require.Len(t, raw.FootnoteLinks, 1)
+	assert.Contains(t, raw.FootnoteLinks[0].InnerXML, "Preserved footnote text")
+	assert.Len(t, raw.FootnoteLink, 1)
+
+	require.Len(t, raw.UnsupportedTopLevel, 2)
+	assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "item"}, raw.UnsupportedTopLevel[0].XMLName)
+	assert.Equal(t, xml.Name{Space: xbrlInstanceNamespace, Local: "tuple"}, raw.UnsupportedTopLevel[1].XMLName)
+	assert.Contains(t, raw.UnsupportedTopLevel[1].InnerXML, "<child>nested</child>")
+	assert.Empty(t, raw.Facts)
+}
+
+func TestValidateRejectsDuplicateContextIDs(t *testing.T) {
+	// language=xml
+	doc := `<xbrl>
+    <link:schemaRef/>
+    <context id="c1">
+        <entity><identifier scheme="http://www.sec.gov/CIK">0000320193</identifier></entity>
+        <period><instant>2021-03-27</instant></period>
+    </context>
+    <context id="c1">
+        <entity><identifier scheme="http://www.sec.gov/CIK">0000320193</identifier></entity>
+        <period><instant>2021-03-28</instant></period>
+    </context>
+    <unit id="u1"><measure>shares</measure></unit>
+    <ci:assets contextRef="c1" unitRef="u1" precision="3">727</ci:assets>
+</xbrl>`
+
+	var content XBRL
+	require.NoError(t, xml.Unmarshal([]byte(doc), &content))
+
+	assert.EqualError(t, content.Validate(), "duplicate context id: c1")
+}
+
+func TestValidateRejectsDuplicateUnitIDs(t *testing.T) {
+	// language=xml
+	doc := `<xbrl>
+    <link:schemaRef/>
+    <context id="c1">
+        <entity><identifier scheme="http://www.sec.gov/CIK">0000320193</identifier></entity>
+        <period><instant>2021-03-27</instant></period>
+    </context>
+    <unit id="u1"><measure>shares</measure></unit>
+    <unit id="u1"><measure>iso4217:USD</measure></unit>
+    <ci:assets contextRef="c1" unitRef="u1" precision="3">727</ci:assets>
+</xbrl>`
+
+	var content XBRL
+	require.NoError(t, xml.Unmarshal([]byte(doc), &content))
+
+	assert.EqualError(t, content.Validate(), "duplicate unit id: u1")
+}
+
+func TestValidateRejectsKnownUnsupportedTopLevelElements(t *testing.T) {
+	// language=xml
+	doc := `<xbrl xmlns="http://www.xbrl.org/2003/instance">
+    <tuple/>
+</xbrl>`
+
+	var content XBRL
+	require.NoError(t, xml.Unmarshal([]byte(doc), &content))
+
+	require.Len(t, content.UnsupportedTopLevel, 1)
+	assert.EqualError(t, content.Validate(), "unsupported top-level element: http://www.xbrl.org/2003/instance:tuple")
+}
+
 func stringPtr(str string) *string {
 	return &str
 }
+
+func requireAttr(t *testing.T, attrs []xml.Attr, space, local string) string {
+	t.Helper()
+
+	for _, attr := range attrs {
+		if attr.Name.Space == space && attr.Name.Local == local {
+			return attr.Value
+		}
+	}
+
+	require.Failf(t, "missing XML attribute", "space=%q local=%q attrs=%v", space, local, attrs)
+	return ""
+}
+
+func requireFact(t *testing.T, facts []Fact, space, local, value string) Fact {
+	t.Helper()
+
+	for _, fact := range facts {
+		if fact.XMLName.Space == space && fact.XMLName.Local == local && fact.Value() == value {
+			return fact
+		}
+	}
+
+	require.Failf(t, "missing fact", "space=%q local=%q value=%q", space, local, value)
+	return Fact{}
+}