diff --git a/data_collections_api/base_schema.py b/data_collections_api/base_schema.py
deleted file mode 100644
index 7c65c55..0000000
--- a/data_collections_api/base_schema.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""Parsing schema for metadata."""
-
-from __future__ import annotations
-
-from datetime import date
-from urllib.parse import urlparse, urlunparse
-from uuid import UUID
-
-from schema import And, Optional, Or, Regex, Schema, Use
-
-ORCID_ID_RE = r"(\d{4}-){3}\d{4}"
-
-id_schema = Or(
-    {
-        "scheme": "orcid",
-        "identifier": Regex(ORCID_ID_RE),
-    },
-    {
-        "identifier": And(Use(urlparse), lambda x: x.scheme and x.netloc, Use(urlunparse)),
-        Optional("scheme", default="doi"): "doi",
-    },
-)
-
-creator_schema = Schema(
-    {
-        Optional("affiliations"): [
-            {
-                "name": str,
-            },
-        ],
-        "person_or_org": {
-            Or("name", "family_name"): And(str, len),
-            Optional("given_name"): And(str, len),
-            Optional("identifiers"): [id_schema],
-            "type": Or("personal"),
-        },
-    },
-    ignore_extra_keys=True,
-)
-
-metadata_schema = Schema(
-    {
-        "title": And(str, len),
-        "description": And(str, len),
-        "creators": [creator_schema],
-        "rights": [
-            {
-                "id": Or("cc-by-4.0"),
-            },
-        ],
-        "resource_type": {
-            "id": Or("model"),
-        },
-        Optional("subjects", default=[]): [{"subject": str}],
-        "version": Regex(r"^v\d+(\.\d+)*"),
-        Optional("publisher"): str,
-        Optional("publication_date"): Or(date.fromisoformat, date.fromtimestamp),
-        Optional("identifiers"): [id_schema],
-    },
-)
-
-base_schema = Schema(
-    {
-        Optional("access", default={"files": "public", "record": "public"}): {
-            Optional("embargo"): {
-                "active": bool,
-                "reason": Or(str, None),
-            },
-            Optional("files", default="public"): Or("public", "private"),
-            Optional("record", default="public"): Or("public", "private"),
-            Optional("status"): Or("open", "closed"),
-        },
-        Optional("files"): {"enabled": bool},
-        "custom_fields": {"dsmd": [dict]},
-        "metadata": metadata_schema,
-        Optional("community"): UUID,
-    },
-)
diff --git a/data_collections_api/cli/data_collections_main.py b/data_collections_api/cli/data_collections_main.py
index 288c2aa..61bf82b 100644
--- a/data_collections_api/cli/data_collections_main.py
+++ b/data_collections_api/cli/data_collections_main.py
@@ -9,6 +9,7 @@
 from data_collections_api.cli.record_upload import get_arg_parser as get_upload_parser
 from data_collections_api.cli.record_upload import main as upload_main
 from data_collections_api.metadata import dump_example, validate_cli
+from data_collections_api.schemas import SCHEMAS
 
 
 def get_arg_parser() -> argparse.ArgumentParser:
@@ -47,6 +48,13 @@ def get_arg_parser() -> argparse.ArgumentParser:
         help="Parse FILE as this type (default: determine from suffix).",
         default=None,
     )
+    sp.add_argument(
+        "-S",
+        "--schema",
+        choices=SCHEMAS.keys(),
+        help="Validate against given schema (default: default).",
+        default="default",
+    )
     sp.set_defaults(func=validate_cli)
 
     # Dump
@@ -61,7 +69,7 @@ def get_arg_parser() -> argparse.ArgumentParser:
         "-f",
         "--format",
         choices=("json", "yaml"),
-        help="Parse FILE as this type (default: determine from suffix).",
+        help="Dump FILE as this type (default: determine from suffix).",
         default=None,
     )
     sp.set_defaults(func=dump_example)
diff --git a/data_collections_api/dumpers.py b/data_collections_api/dumpers.py
index 34dd1f6..0475650 100644
--- a/data_collections_api/dumpers.py
+++ b/data_collections_api/dumpers.py
@@ -6,7 +6,7 @@
 from contextlib import suppress
 import json
 from pathlib import Path
-from typing import Any, Literal, NamedTuple, TextIO
+from typing import Any, Literal, NamedTuple, TextIO, overload
 
 _YAML_TYPE = None
 
@@ -313,6 +313,22 @@ def get_str_loader(fmt: Formats):
     return get_load_dump(fmt, loader=True, string=True)
 
 
+@overload
+def guess_format(path: Path) -> Formats: ...  # numpydoc ignore=GL08
+
+
+@overload
+def guess_format(
+    path: Path, *, raise_on_invalid: Literal[True]
+) -> Formats: ...  # numpydoc ignore=GL08
+
+
+@overload
+def guess_format(
+    path: Path, *, raise_on_invalid: Literal[False]
+) -> Formats | None: ...  # numpydoc ignore=GL08
+
+
 def guess_format(path: Path, *, raise_on_invalid: bool = True) -> Formats | None:
     """
     Guess format from path suffix.
diff --git a/data_collections_api/metadata.py b/data_collections_api/metadata.py
index 944b394..de5b0e3 100644
--- a/data_collections_api/metadata.py
+++ b/data_collections_api/metadata.py
@@ -6,7 +6,6 @@
 from functools import singledispatch
 from pathlib import Path
 
-from data_collections_api.base_schema import base_schema as schema
 from data_collections_api.dumpers import (
     Formats,
     get_dumper,
@@ -14,6 +13,7 @@
     get_str_loader,
     guess_format,
 )
+from data_collections_api.schemas import Schema, get_schema
 
 EXAMPLES_FOLDER = Path(__file__).parent / "examples"
 
@@ -64,31 +64,31 @@ def validate_metadata(_val, fmt: Formats | None = None):
 
 
 @validate_metadata.register(dict)
-def _(data: dict) -> dict:
-    return schema.validate(data)
+def _(data: dict, schema: Schema | str) -> dict:
+    return get_schema(schema).validate(data)
 
 
 @validate_metadata.register(str)
-def _(data: Path | str, fmt: Formats) -> dict:
+def _(data: Path | str, schema: Schema | str, fmt: Formats) -> dict:
     try:
         data = get_str_loader(fmt)(data)
     except Exception:
         data = Path(data)
         return validate_metadata(data)
-    else:
-        return schema.validate(data)
+
+    return get_schema(schema).validate(data)
 
 
 @validate_metadata.register(Path)
-def _(path: Path, fmt: Formats | None = None) -> dict:
+def _(path: Path, schema: Schema | str, fmt: Formats | None = None) -> dict:
     fmt = fmt or guess_format(path)
     data = get_loader(fmt)(path)
-    return schema.validate(data)
+    return get_schema(schema).validate(data)
 
 
 @validate_metadata.register(argparse.Namespace)
 def _(inp: argparse.Namespace) -> dict:
-    return validate_metadata(inp.file, inp.format)
+    return validate_metadata(inp.file, inp.schema, inp.format)
 
 
 def validate_cli(inp: argparse.Namespace) -> dict:
diff --git a/data_collections_api/schemas/__init__.py b/data_collections_api/schemas/__init__.py
new file mode 100644
index 0000000..fe54fed
--- /dev/null
+++ b/data_collections_api/schemas/__init__.py
@@ -0,0 +1,52 @@
+"""Module defining different schemas available for use."""
+
+from __future__ import annotations
+
+from functools import singledispatch
+
+from schema import Schema as Schema
+
+from .base import base_schema
+
+SCHEMAS = {
+    "base": base_schema,
+    "default": base_schema,
+}
+
+
+@singledispatch
+def get_schema(schema) -> Schema:
+    """
+    Get schema.
+
+    Parameters
+    ----------
+    schema : Schema | str
+        Schema to get.
+
+    Returns
+    -------
+    Schema
+        Desired schema.
+
+    Raises
+    ------
+    NotImplementedError
+        Passed an invalid type.
+
+    Examples
+    --------
+    >>> get_schema(base_schema)
+    >>> get_schema("default")
+    """
+    raise NotImplementedError(f"Cannot find schema with {type(schema).__name__}")
+
+
+@get_schema.register
+def _(schema: Schema) -> Schema:
+    return schema
+
+
+@get_schema.register
+def _(schema: str) -> Schema:
+    return SCHEMAS[schema]
diff --git a/data_collections_api/schemas/base.py b/data_collections_api/schemas/base.py
new file mode 100644
index 0000000..8556092
--- /dev/null
+++ b/data_collections_api/schemas/base.py
@@ -0,0 +1,108 @@
+"""Parsing schema for metadata."""
+
+from __future__ import annotations
+
+from datetime import date
+from urllib.parse import urlparse, urlunparse
+
+from schema import And, Literal, Optional, Or, Regex, Schema, Use
+
+ORCID_ID_RE = r"(\d{4}-){3}\d{4}"
+UUID_RE = r"\d{8}-(\d{4}-){3}\d{12}"
+
+id_schema = Or(
+    {
+        Literal("scheme", description="ID scheme."): "orcid",
+        Literal("identifier", description="An [ORCID](https://orcid.org)."): Regex(ORCID_ID_RE),
+    },
+    {
+        Optional(Literal("scheme", description="ID scheme."), default="doi"): "doi",
+        Literal("identifier", description="A [DOI](https://www.doi.org)"): And(
+            Use(urlparse), lambda x: x.scheme and x.netloc, Use(urlunparse)
+        ),
+    },
+)
+
+creator_schema = Schema(
+    {
+        Optional(Literal("affiliations", description="Member affiliations.")): [
+            {
+                Literal("name", description="Name of institution."): str,
+            },
+        ],
+        Literal("person_or_org", description="Person or organisation."): {
+            Or(
+                Literal("name", description="Full set of given names."),
+                Literal("family_name", description="Family name(s)."),
+            ): And(str, len),
+            Optional(Literal("given_name", description="Given name(s).")): And(str, len),
+            Optional(Literal("identifiers", description="ORCIDs or other IDs")): [id_schema],
+            Literal("type", description="Personal or organisation."): Or("personal"),
+        },
+    },
+    ignore_extra_keys=True,
+)
+
+metadata_schema = Schema(
+    {
+        Literal("title", description="Title of resource."): And(str, len),
+        Literal("description", description="Summary of resource."): And(str, len),
+        Literal("creators", description="List of creators."): [creator_schema],
+        Literal("rights", description="Rights or license."): [
+            {
+                Literal("id", description="ID of rights or license."): Or("cc-by-4.0"),
+            },
+        ],
+        Literal("resource_type", description="Type of resource."): {
+            Literal("id", description="Resource class."): Or("model"),
+        },
+        Optional(
+            Literal("subjects", description="List of keywords defining subjects resource covers."),
+            default=[],
+        ): [{Literal("subject", description="Subject keyword."): str}],
+        Literal("version", description="Current version of resource."): Regex(r"^v\d+(\.\d+)*"),
+        Optional(Literal("publisher", description="Publisher of resource.")): str,
+        Optional(Literal("publication_date", description="Date of publication of resource.")): Or(
+            date.fromisoformat, date.fromtimestamp
+        ),
+        Optional(
+            Literal("identifiers", description="Resource identifiers such as ORCID or DOI.")
+        ): [id_schema],
+    },
+)
+
+base_schema = Schema(
+    {
+        Optional(
+            Literal("access", description="Accessibility of data outside of owners."),
+            default={"files": "public", "record": "public"},
+        ): {
+            Optional(Literal("embargo", description="Details of resource embargo.")): {
+                Literal("active", description="Whether resource is under embargo."): bool,
+                Literal("reason", description="Cause for embargo."): Or(str, None),
+            },
+            Optional(
+                Literal("files", description="Accessibility to individual files."), default="public"
+            ): Or("public", "private"),
+            Optional(
+                Literal("record", description="Accessibility to record as a whole."),
+                default="public",
+            ): Or("public", "private"),
+            Optional(Literal("status", description="Current status or resource.")): Or(
+                "open", "closed"
+            ),
+        },
+        Optional(Literal("files", description="Details of files.")): {
+            Literal("enabled", description="Whether file is enabled."): bool
+        },
+        Literal("custom_fields", description="Block for custom data."): {
+            Literal("dsmd", description="Domain specific metadata (dsmd)."): [dict]
+        },
+        Literal("metadata", description="Resource metadata."): metadata_schema,
+        Optional(
+            Literal("community", description="UUID of community associated with resource.")
+        ): Regex(UUID_RE),
+    },
+    description="Base schema from which community specific schemas are built.",
+    name="base",
+)
diff --git a/docs/source/api/data_collections_api.cli.rst b/docs/source/api/data_collections_api.cli.rst
new file mode 100644
index 0000000..55eb18d
--- /dev/null
+++ b/docs/source/api/data_collections_api.cli.rst
@@ -0,0 +1,29 @@
+data\_collections\_api.cli package
+==================================
+
+Submodules
+----------
+
+data\_collections\_api.cli.data\_collections\_main module
+---------------------------------------------------------
+
+.. automodule:: data_collections_api.cli.data_collections_main
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+data\_collections\_api.cli.record\_upload module
+------------------------------------------------
+
+.. automodule:: data_collections_api.cli.record_upload
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+Module contents
+---------------
+
+.. automodule:: data_collections_api.cli
+   :members:
+   :show-inheritance:
+   :undoc-members:
diff --git a/docs/source/api/data_collections_api.rst b/docs/source/api/data_collections_api.rst
index 1572033..eae62b2 100644
--- a/docs/source/api/data_collections_api.rst
+++ b/docs/source/api/data_collections_api.rst
@@ -8,6 +8,7 @@ Subpackages
    :maxdepth: 4
 
    data_collections_api.cli
+   data_collections_api.schemas
 
 Submodules
 ----------
@@ -36,14 +37,6 @@ data\_collections\_api.metadata module
    :show-inheritance:
    :undoc-members:
 
-data\_collections\_api.schema module
-------------------------------------
-
-.. automodule:: data_collections_api.schema
-   :members:
-   :show-inheritance:
-   :undoc-members:
-
 Module contents
 ---------------
 
diff --git a/docs/source/api/data_collections_api.schemas.rst b/docs/source/api/data_collections_api.schemas.rst
new file mode 100644
index 0000000..05185db
--- /dev/null
+++ b/docs/source/api/data_collections_api.schemas.rst
@@ -0,0 +1,21 @@
+data\_collections\_api.schemas package
+======================================
+
+Submodules
+----------
+
+data\_collections\_api.schemas.base module
+------------------------------------------
+
+.. automodule:: data_collections_api.schemas.base
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+Module contents
+---------------
+
+.. automodule:: data_collections_api.schemas
+   :members:
+   :show-inheritance:
+   :undoc-members:
diff --git a/docs/source/api/modules.rst b/docs/source/api/modules.rst
index 19f1e46..4ce85db 100644
--- a/docs/source/api/modules.rst
+++ b/docs/source/api/modules.rst
@@ -1,5 +1,5 @@
-API Documentation
-=================
+data_collections_api
+====================
 
 .. toctree::
    :maxdepth: 4
diff --git a/docs/source/cli.rst b/docs/source/cli.rst
index 4f77bc0..f25f5a1 100644
--- a/docs/source/cli.rst
+++ b/docs/source/cli.rst
@@ -1,32 +1,228 @@
 CLI Usage
 =========
 
-``data_collections_api`` provides a few commandline tools for
-simplifying the process of uploading or verifying data.
+``data_collections_api`` provides a few command-line tools for simplifying the process of uploading
+or verifying data and metadata.
 
 data_collections
 ----------------
 
-``data_collections`` is the general top-level interface to the
-tools. These tools are implemented as sub-parsers within the main
-module.
+.. program:: data_collections
+.. describe:: data_collections
+
+   .. option:: operation {validate,template,dump,upload}
+
+      .. option:: validate
+
+         Validate metadata
+
+      .. option:: template
+      .. option:: dump
+
+         Dump a template file.
+
+      .. option:: `upload`
+
+         Upload a dataset to an invenio repository.
+
+   .. option:: -V, --version
+
+      Show program's version number and exit.
+
+``data_collections`` is the general top-level interface to the tools. These tools are implemented as
+sub-parsers within the main module.
+
+.. admonition:: Running ``data_collections``
+
+   By default, if the ``data_collections_api`` package is installed, ``data_collections`` is
+   installed as an executable script on your main ``PATH``. In general, this is the main entry
+   point.
+
+   If that is not desired, it is possible to run ``data_collections`` through the python module
+   system::
+
+     python -m data_collections_api
+
+   where the ``data_collections_api`` **module** (folder) is on the current ``sys.path`` (by being
+   installed, in the current ``PYTHONPATH`` or being in the current working directory.)::
+
+     PYTHONPATH=/path/containing/data_collections_api python -m data_collections_api
+
+   Throughout the rest of this page, we will assume ``data_collections`` is used as the main
+   entrypoint.
+
+
+.. _upload:
 
 upload
 ******
 
-Construct a set of data and upload a set of files along with the metadata to an
-Invenio repository.
+.. program:: data_collections upload
+.. describe:: data_collections upload
+
+   .. option:: --api-url URL
+
+      URL for the API associated with the Invenio repository, e.g.
+      https://data-collections-staging.psdi.ac.uk/api
+
+   .. option:: --api-key str
+
+      Your API key/token for accessing the Invenio repository instance.
+
+   .. option:: --metadata-path file
+
+      File path to the yaml file containing the metadata to upload a record to an Invenio
+      repository, e.g.  path/to/files/record.yaml
+
+   .. option:: -f {json,yaml}, --metadata-format {json,yaml}
+
+      Parse metadata file as this type (default: yaml).
+
+   .. option:: --files FILES [FILES ...]
+
+      List of file paths associated with the record to be uploaded, e.g. path/to/files/data.*
+
+   .. option:: --community str
+
+      Name of a Invenio repository community to upload the record to, e.g. biosimdb,
+      data-to-knowledge, etc.
+
+
+``data_collections_api`` can take your data and metadata and automatically upload it to the Invenio
+repository. To do so, you need to have some information at hand:
+
+- The URL of the repository you wish to upload the data to. In the case of PSDI data, this will
+  often be https://data-collections.psdi.ac.uk.
+- Your API key (also called a Personal Access Token or PAT) for the repository to give permissions
+  to write and upload data.
+- A metadata file detailing the data relating to the files (see :doc:`schemas/index`).
+- The files ready to upload.
+
+With all this prepared, uploading the data is as simple as:
+
+.. code-block:: console
+
+   data_collections upload --api-url https://data-collections.psdi.ac.uk --api-key 1234567890abcdef --metadata-path /path/to/metata_file.yaml --files FILE1 FILE2 --community my_community
+
+.. note::
+
+   Since this is a common operation it is also available as the standalone :option:`upload_record`
+
+.. _validate:
 
 validate
 ********
 
+.. program:: data_collections validate
+
+.. describe:: data_collections validate
+
+   .. option:: FILE
+
+      File to validate.
+
+   .. option:: -f {json,yaml}, --format {json,yaml}
+
+      Parse :option:`FILE` as this type (default: determine from suffix).
+
+   .. option:: -S SCHEMA, --schema SCHEMA
+
+      Validate against the given schema (default: :doc:`base`)
+
 Validate the metadata file for a dataset before uploading.
 
+``data_collections_api`` can validate your metadata file against the schema to verify the contents
+of the file match what is required to make a valid upload.
+
+.. note::
+
+   The validator does not verify most data itself, you must ensure that all entries are spelled and
+   written correctly.
+
+To validate a data file simply run:
+
+.. code-block:: console
+
+   data_collections validate [file]
+
+e.g.
+
+.. code-block:: console
+
+   data_collections validate examples/biosim_record.yaml
+
+The file can be either in ``json`` or ``yaml`` formats (see: :doc:`schema`). :option:`data_collections validate` will attempt to determine the
+appropriate format from the file extension, but this can be specified explicitly with the ``-f``
+flag.
+
+.. code-block:: console
+
+   data_collections validate -f json examples/biosim_record.yaml
+
+.. note::
+
+   The above will raise an error since the file is not in ``json`` format.
+
 dump
 ****
 
-Dump a template metadata file ready for modification to upload.
+.. program:: data_collections template
+.. describe:: data_collections template
+.. describe:: data_collections dump
+
+   .. option:: FILE
+
+      File to dump.
+
+   .. option:: -f {json,yaml}, --format {json,yaml}
+
+      Dump :option:`FILE` as this type (default: determine from suffix).
+
+``data_collections_api`` provides a method to quick-start building metadata, ``template`` will dump
+an example metadata file for a particular community and data-type (though currently only a basic
+example is available).  To do so, simply run
+
+.. code-block:: console
+
+   data_collections dump my_metadata.yaml
+
+You can then edit and modify this template to fill in the data needed.
 
 
 upload_record
 -------------
+
+.. program:: upload_record
+.. describe:: upload_record
+
+   .. option:: --api-url URL
+
+      URL for the API associated with the Invenio repository, e.g.
+      https://data-collections-staging.psdi.ac.uk/api
+
+   .. option:: --api-key str
+
+      Your API key/token for accessing the Invenio repository instance.
+
+   .. option:: --metadata-path file
+
+      File path to the yaml file containing the metadata to upload a record to an Invenio
+      repository, e.g.  path/to/files/record.yaml
+
+   .. option:: -f {json,yaml}, --metadata-format {json,yaml}
+
+      Parse metadata file as this type (default: yaml).
+
+   .. option:: --files FILES [FILES ...]
+
+      List of file paths associated with the record to be uploaded, e.g. ``path/to/files/data.*``
+
+   .. option:: --community str
+
+      Name of a Invenio repository community to upload the record to, e.g. biosimdb,
+      data-to-knowledge, etc.
+
+
+One-stop tool to upload a record to the repository, see `upload`_.
+
+.. _pat_guide: ...
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 829180d..e2c0d2f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -8,8 +8,16 @@
 from __future__ import annotations
 
 import time
+import sys
+from pathlib import Path
+
+DOCS_SRC = Path(__file__).parent.resolve()
+sys.path.append(str(DOCS_SRC.parents[2]))
+sys.path.append(str(DOCS_SRC))
 
 import data_collections_api
+from scripts.schema_gen import main as gen_schema
+
 
 project = "Data Collections API"
 copyright_first_year = "2024"
@@ -24,13 +32,27 @@
 extensions = [
     "numpydoc",
     "sphinx.ext.autodoc",
+    # "sphinx.ext.apidoc",
     "sphinx.ext.autosummary",
     "sphinx.ext.intersphinx",
     "sphinx.ext.mathjax",
     "sphinx.ext.viewcode",
     "sphinxcontrib.contentui",
+    "myst_parser",
+]
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".txt": "markdown",
+    ".md": "markdown",
+}
+
+apidoc_modules = [
+    {"path": "../../data_collections_api", "destination": "api/"},
 ]
 
+gen_schema(["-Fv", f"-o={DOCS_SRC / 'schemas'}", "-O=%s.md", "all"])
+
 always_use_bars_union = True
 napoleon_include_special_with_doc = True
 napoleon_use_param = True
@@ -45,7 +67,6 @@
     "python": ("https://docs.python.org/3", None),
 }
 
-
 templates_path = ["_templates"]
 exclude_patterns = []
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 766a609..78bccf3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -14,4 +14,6 @@ Project to allow simplified editing and construction of Invenio data for the PSD
    :caption: Contents:
 
    cli
-   api/modules
+   schema
+   schemas/index
+   API Documentation <api/modules>
diff --git a/docs/source/schema.rst b/docs/source/schema.rst
new file mode 100644
index 0000000..2afa57e
--- /dev/null
+++ b/docs/source/schema.rst
@@ -0,0 +1,6 @@
+Metadata Format
+===============
+
+The metadata file may be either in a `json <https://www.json.org/json-en.html>`__ or `yaml <https://yaml.org>`__ format.
+
+Each community may have their own metadata requirements, which are all encompassed in the ``custom_fields.dsmd`` field, the full list of supported schemas is available in the :doc:`schemas/index` section.
diff --git a/docs/source/schemas/base.md b/docs/source/schemas/base.md
new file mode 100644
index 0000000..1183cdc
--- /dev/null
+++ b/docs/source/schemas/base.md
@@ -0,0 +1,44 @@
+# base
+
+Base schema from which community specific schemas are built.
+
+### Type: `object`
+
+> ⚠️ Additional properties are not allowed.
+
+| Property | Type | Required | Possible values | Default | Description |
+| -------- | ---- | -------- | --------------- | ------- | ----------- |
+| custom_fields | `object` | ✅ | object |  | Block for custom data. |
+| custom_fields.dsmd | `array` | ✅ | object |  | Domain specific metadata (dsmd). |
+| metadata | `object` | ✅ | object |  | Resource metadata. |
+| metadata.title | `string` | ✅ | string |  | Title of resource. |
+| metadata.description | `string` | ✅ | string |  | Summary of resource. |
+| metadata.creators | `array` | ✅ | object |  | List of creators. |
+| metadata.creators[].affiliations | `array` |  | object |  | Member affiliations. |
+| metadata.creators[].affiliations[].name | `string` | ✅ | string |  | Name of institution. |
+| metadata.creators[].person_or_org | `object` | ✅ | object |  | Person or organisation. |
+| metadata.creators[].person_or_org.name | `string` |  | string |  | Full set of given names. |
+| metadata.creators[].person_or_org.family_name | `string` |  | string |  | Family name(s). |
+| metadata.creators[].person_or_org.given_name | `string` |  | string |  | Given name(s). |
+| metadata.creators[].person_or_org.identifiers | `array` |  | object and/or object |  | ORCIDs or other IDs |
+| metadata.creators[].person_or_org.type | `const` | ✅ | `personal` |  | Personal or organisation. |
+| metadata.rights | `array` | ✅ | object |  | Rights or license. |
+| metadata.rights[].id | `const` | ✅ | `cc-by-4.0` |  | ID of rights or license. |
+| metadata.resource_type | `object` | ✅ | object |  | Type of resource. |
+| metadata.resource_type.id | `const` | ✅ | `model` |  | Resource class. |
+| metadata.subjects | `array` |  | object | `[]` | List of keywords defining subjects resource covers. |
+| metadata.subjects[].subject | `string` | ✅ | string |  | Subject keyword. |
+| metadata.version | `string` | ✅ | [`^v\d+(\.\d+)*`](https://regex101.com/?regex=%5Ev%5Cd%2B%28%5C.%5Cd%2B%29%2A) |  | Current version of resource. |
+| metadata.publisher | `string` |  | string |  | Publisher of resource. |
+| metadata.publication_date | `None` |  | None |  | Date of publication of resource. |
+| metadata.identifiers | `array` |  | object and/or object |  | Resource identifiers such as ORCID or DOI. |
+| access | `object` |  | object | `{"files": "public", "record": "public"}` | Accessibility of data outside of owners. |
+| access.embargo | `object` |  | object |  | Details of resource embargo. |
+| access.embargo.active | `boolean` | ✅ | boolean |  | Whether resource is under embargo. |
+| access.embargo.reason | `string` or `null` | ✅ | string |  | Cause for embargo. |
+| access.files | `None` |  | `public` `private` | `"public"` | Accessibility to individual files. |
+| access.record | `None` |  | `public` `private` | `"public"` | Accessibility to record as a whole. |
+| access.status | `None` |  | `open` `closed` |  | Current status or resource. |
+| files | `object` |  | object |  | Details of files. |
+| files.enabled | `boolean` | ✅ | boolean |  | Whether file is enabled. |
+| community | `string` |  | [`\d{8}-(\d{4}-){3}\d{12}`](https://regex101.com/?regex=%5Cd%7B8%7D-%28%5Cd%7B4%7D-%29%7B3%7D%5Cd%7B12%7D) |  | UUID of community associated with resource. |
diff --git a/docs/source/schemas/index.rst b/docs/source/schemas/index.rst
new file mode 100644
index 0000000..abff4eb
--- /dev/null
+++ b/docs/source/schemas/index.rst
@@ -0,0 +1,10 @@
+Schemas
+=======
+
+This page documents the available schemas.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Schemas:
+
+   base
diff --git a/docs/source/scripts/__init__.py b/docs/source/scripts/__init__.py
new file mode 100644
index 0000000..14e5b9b
--- /dev/null
+++ b/docs/source/scripts/__init__.py
@@ -0,0 +1 @@
+"""Set of scripts for building docs."""
diff --git a/docs/source/scripts/schema_gen.py b/docs/source/scripts/schema_gen.py
new file mode 100644
index 0000000..00bceed
--- /dev/null
+++ b/docs/source/scripts/schema_gen.py
@@ -0,0 +1,265 @@
+"""Generate schema documentation."""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+from shutil import rmtree
+import sys
+from textwrap import indent
+from typing import TYPE_CHECKING
+
+import jsonschema_markdown
+
+from data_collections_api.schemas import SCHEMAS, Schema, get_schema
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+__author__ = "Jacob Wilkins"
+__version__ = "0.1"
+
+INDEX_MD = """\
+{filename}
+{underline}
+
+This page documents the available schemas.
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Schemas:
+
+{schemas}
+
+"""
+
+
+def get_arg_parser() -> argparse.ArgumentParser:
+    """Get parser for CLI.
+
+    Returns
+    -------
+    argparse.ArgumentParser
+        Arg parser.
+    """
+    parser = argparse.ArgumentParser(
+        description="Convert a schema to a markdown document.",
+    )
+
+    parser.add_argument("-V", "--version", action="version", version=f"%(prog)s v{__version__}")
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Print while generating schemas",
+    )
+    parser.add_argument(
+        "-F",
+        "--force",
+        action="store_true",
+        help="Force removal of output directory (if not CWD). (default: %(default)s)",
+    )
+    parser.add_argument(
+        "schemas",
+        nargs="*",
+        choices=SCHEMAS.keys() | {"all"},
+        help="Schemas to convert or 'all' if all are to be done. (default: %(default)r)",
+        default="all",
+    )
+
+    parser.add_argument(
+        "--clear",
+        action=argparse.BooleanOptionalAction,
+        help="Clear folder before writing. (default: %(default)s)",
+        default=True,
+    )
+    parser.add_argument(
+        "--index",
+        action=argparse.BooleanOptionalAction,
+        help="Write index file with toctree to folder. (default: %(default)s)",
+        default=True,
+    )
+
+    parser.add_argument(
+        "--header",
+        help="Title of index file. (default: %(default)r)",
+        default="Schemas",
+    )
+
+    parser.add_argument(
+        "-O",
+        "--out-name",
+        help=(
+            "Format to use for naming output, "
+            "substituting '%%s' for schema key. (default: %(default)r)"
+        ),
+        default="%s.md",
+    )
+    parser.add_argument(
+        "-o",
+        "--out-folder",
+        help="Folder to write formatted docs in. (default: %(default)r)",
+        default="schemas",
+        type=Path,
+    )
+
+    return parser
+
+
+def process_schema(
+    schema_key: Schema | str,
+    *,
+    name: str | None = None,
+) -> str:
+    """Process a schema into markdown.
+
+    Parameters
+    ----------
+    schema_key : Schema or str
+        Key for schemas.
+    name : str, optional
+        Override for name (mandatory if passing :class:`schema` directly).
+
+    Returns
+    -------
+    str
+        Markdown rendered documentation.
+
+    Raises
+    ------
+    ValueError
+        Name not passed with Schema.
+    """
+    match (schema_key, name):
+        case (_, str() as inp):
+            name = inp
+        case (str() as inp, _):
+            name = inp
+        case _:
+            raise ValueError(f"Cannot reliably determine name from {type(schema_key).__name__}")
+
+    schema = get_schema(schema_key)
+    json_schema = schema.json_schema(name)
+
+    return jsonschema_markdown.generate(
+        json_schema,
+        title=name,
+        footer=False,
+        hide_empty_columns=True,
+    )
+
+
+def get_filename(fmt: str, key: str) -> str:
+    """Format filename from CLI.
+
+    Parameters
+    ----------
+    fmt : str
+        CLI format.
+    key : str
+        Schema key.
+
+    Returns
+    -------
+    str
+        Formatted filename.
+
+    Examples
+    --------
+    >>> get_filename("%s.md", "base")
+    'base.md'
+    """
+    return fmt % key
+
+
+def clear_folder(folder: Path, *, force: bool = False, verbose: bool = False) -> None:
+    """Delete folder and create new (empty) one.
+
+    Parameters
+    ----------
+    folder : Path
+        Folder to clear.
+    force : bool
+        Do not ask whether to remove folder.
+    verbose : bool
+        Print status.
+    """
+    if not folder.exists():
+        return
+
+    if folder.samefile(Path.cwd()):
+        print("Cannot clear folder as this is current working directory.")
+        return
+
+    if (
+        not force
+        and input(f"Running this will clear {folder}, are you sure you want to continue? [y/N] ")
+        .strip()
+        .lower()
+        != "y"
+    ):
+        print("Cancelling.")
+        sys.exit()
+
+    if verbose:
+        print(f"Deleting {folder}...")
+
+    rmtree(folder, ignore_errors=True)
+    folder.mkdir()
+
+
+def main(args_in: Sequence[str] | None = None, /) -> None:
+    """Parse schemas and dump to file.
+
+    Parameters
+    ----------
+    args_in : Sequence[str], optional
+        Pass CLI params directly.
+    """
+    parser = get_arg_parser()
+    args = parser.parse_args(args_in)
+
+    # Get unique (by schema), but ordered keys matching reqs
+    schemas = {
+        schema: key
+        for key, schema in reversed(SCHEMAS.items())
+        if "all" in args.schemas or key in args.schemas
+    }
+    out_names = [get_filename(args.out_name, key) for key in schemas.values()]
+
+    if args.verbose:
+        print(f"Generating schemas for keys {', '.join(map(repr, schemas.values()))}...")
+
+    if args.clear:
+        clear_folder(args.out_folder, force=args.force, verbose=args.verbose)
+
+    for key, out_name in zip(schemas.values(), out_names, strict=True):
+        out_path = args.out_folder / out_name
+
+        if args.verbose:
+            print(f"Generating schema for {key!r} to {out_path}...")
+
+        markdown = process_schema(key)
+
+        with out_path.open("w", encoding="utf-8") as out:
+            out.write(markdown)
+
+    if args.index:
+        if args.verbose:
+            print(f"Writing index to {args.out_folder / 'index.rst'}...")
+
+        with (args.out_folder / "index.rst").open("w", encoding="utf-8") as out:
+            out.write(
+                INDEX_MD.format(
+                    filename=args.header,
+                    underline="=" * len(args.header),
+                    schemas=indent("\n".join(Path(key).stem for key in out_names), " " * 3),
+                )
+            )
+
+    if args.verbose:
+        print("Done with schemas")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 8326679..6553353 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,8 @@ docs = [
    "sphinxcontrib-contentui<1.0.0,>=0.2.5",
    "furo==2025.9.25",
    "numpydoc>=1.9.0",
+   "myst-parser",
+   "jsonschema-markdown",
 ]
 lint = ["pre-commit<5.0.0,>=4.2.0", "ruff==0.13.3", "numpydoc>=0.19.0"]
 test = ["pytest==8.3.4", "pytest-cov==5.0.0"]