datasciencecampus · lukeroantreeONS · Dec 14, 2025 · Feb 14, 2026 · Feb 14, 2026 · Feb 23, 2026
diff --git a/.gitignore b/.gitignore
@@ -181,3 +181,9 @@ cython_debug/
 
 # VSCode configuration
 .vscode/
+
+/.quarto/
+**/*.quarto_ipynb
+_site/
+docs/
+
diff --git a/.pre-commit-config-NO-DOCKER.yaml b/.pre-commit-config-NO-DOCKER.yaml
@@ -39,11 +39,6 @@ repos:
       # Run the formatter.
       - id: ruff-format
 
-  - repo: https://github.com/fpgmaas/deptry.git
-    rev: 0.24.0
-    hooks:
-      - id: deptry
-
   - repo: local
     hooks:
       - id: forbid-new-init
@@ -56,3 +51,9 @@ repos:
         name: scan code with bandit
         entry: make check-python-security
         language: system
+
+      - id: deptry-uv
+        name: deptry (uv)
+        language: system
+        pass_filenames: false # deptry expects a project path, not filenames
+        entry: uv run deptry --per-rule-ignores "DEP003=plum,DEP004=quartodoc|numpydoc" .
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 ![ONS Logo](./ONS_Logo_Digital_Colour_Landscape_English_RGB.svg)
 
+### A tool produced by the UK Office for National Statistics - Central Data Science & AI group
+
 # ClassifAI
 
 ClassifAI is a Python package that simplifies semantic search and Retrieval Augmented Generation (RAG) pipelines for classification tasks in the production of official statistics. It is designed to help data professionals build applications and pipelines to label new text samples to official statistical classifications, by leveraging (augmented) semantic search over a knowledgebase of previously coded examples.
@@ -69,17 +71,24 @@ The comparison on other aspects, such as per-request speed or hardware requireme
 
 ## Installation
 
-Install the package directly from GitHub in your Python environment
+You can install the package directly from GitHub in your Python environment, using your preferred package manager.
+By default, only the minimum dependencies of the base versions will be installed; you must specify 
+`classifai[all]` to install all sets of optional dependencies, or `classifai[huggingface, ...]` to install one or more specific sets of optional dependencies.
+The current sets of optional dependencies are `[all, huggingface, ollama, gcp]`.
 
+##### Pip
 ```bash
-pip install "https://github.com/datasciencecampus/classifai/releases/download/v0.2.1/classifai-0.2.1-py3-none-any.whl"
-pip install "classifai[huggingface]"
+pip install "classifai[<dependency list(s)>] @ https://github.com/datasciencecampus/classifai/releases/download/v<version e.g. 0.2.1>/classifai-<version e.g. 0.2.1>-py3-none-any.whl"
 ```
 
-or if you are using Astral UV
-
+##### Astral UV
+one-off add to environment:
+```bash
+uv pip install "classifai[<dependency list(s)>] @ https://github.com/datasciencecampus/classifai/releases/download/v<version e.g. 0.2.1>/classifai-<version e.g. 0.2.1>-py3-none-any.whl"
+```
+persist as an environment requirement:
 ```bash
-uv pip install "https://github.com/datasciencecampus/classifai/releases/download/v0.2.1/classifai-0.2.1-py3-none-any.whl[huggingface]"
+uv add "classifai[<dependency list(s)>] @ https://github.com/datasciencecampus/classifai/releases/download/v<version e.g. 0.2.1>/classifai-<version e.g. 0.2.1>-py3-none-any.whl"
 ```
 
 ## Example: Indexing and searching a knowledgebase

diff --git a/_quarto.yml b/_quarto.yml
@@ -0,0 +1,136 @@
+execute: 
+  freeze: auto
+project:
+  type: website
+  render: 
+    - index.qmd
+    - docs/**.qmd
+    - classifai/**.py
+    - DEMO/**.ipynb
+    - DEMO/**.py
+    - DEMO/**.qmd
+    - README.md
+    - DEMO/README.md
+    - classifai/vectorisers/__init__.py
+    - classifai/indexers/__init__.py
+    - classifai/servers/__init__.py
+
+
+website:
+  title: "ClassifAI"
+  page-navigation: true
+  navbar:
+    background: light
+    search: true
+    left:
+      - file: docs/index.qmd
+        text: "Documentation"
+    right:
+      - icon: github
+        href: https://github.com/datasciencecampus/classifai
+
+  sidebar:
+    - id: index
+    - title: "Documentation"
+    - style: "docked"
+      collapse-level: 1
+      contents:
+        - section: "Overview"
+          contents:
+            - docs/index.qmd
+        - section: "Vectorisers"
+          contents:
+            - section: "Vectorisers Overview"
+              contents:
+              - docs/vectorisers.qmd
+              - docs/vectorisers.base.VectoriserBase.qmd
+              - docs/vectorisers.base.VectoriserBase.transform.qmd
+            - section: "Specific Vectorisers"
+              contents:
+              - docs/vectorisers.huggingface.HuggingFaceVectoriser.qmd
+              - docs/vectorisers.ollama.OllamaVectoriser.qmd
+              - docs/vectorisers.gcp.GcpVectoriser.qmd
+        - section: "Indexers"
+          contents:
+            - docs/indexers.qmd
+            - docs/indexers.VectorStore.qmd
+            - docs/indexers.VectorStore.embed.qmd
+            - docs/indexers.VectorStore.search.qmd
+            - docs/indexers.VectorStore.reverse_search.qmd
+            - docs/indexers.VectorStore.from_filespace.qmd
+        - section: "Servers"
+          contents:
+            - docs/servers.qmd
+            - docs/servers.get_router.qmd
+            - docs/servers.get_server.qmd
+            - docs/servers.run_server.qmd
+            - docs/servers.make_endpoints.qmd
+        - section: "DEMO"
+          contents:
+            - file: DEMO/README.md
+            - file: DEMO/general_workflow_demo.ipynb
+            - file: DEMO/custom_vectoriser.ipynb
+            - file: DEMO/custom_preprocessing_and_postprocessing_hooks.ipynb
+
+interlinks:
+ sources:
+   python:
+     url: https://docs.python.org/3/
+
+format:
+  html:
+    theme: cosmo
+    css: styles.css
+    toc: true
+    grid:
+      sidebar-width: 400px
+      body-width: 900px
+      margin-width: 200px
+      gutter-width: 1.0rem
+
+quartodoc:
+  style: pkgdown
+  dir: docs
+  renderer:
+    style: _renderer.py
+    show_signature_annotations: false
+  # renderer:
+  #   style: markdown
+  package: classifai
+  parser: google
+  sections:
+    - title: Vectorisers
+      desc: "Utilities to project text into numerical representation in a semantic vector space"
+      contents:
+        - vectorisers
+        - vectorisers.base
+        - vectorisers.base.VectoriserBase
+        - vectorisers.base.VectoriserBase.transform
+        - vectorisers.huggingface.HuggingFaceVectoriser
+        - vectorisers.ollama.OllamaVectoriser
+        - vectorisers.gcp.GcpVectoriser
+    - title: Indexers
+      desc: "Creation of Vector Stores for efficient similarity search and retrieval"
+      contents:
+        - indexers
+        - indexers.main
+        - indexers.VectorStore
+        - indexers.dataclasses.VectorStoreSearchInput
+        - indexers.dataclasses.VectorStoreSearchOutput
+        - indexers.dataclasses.VectorStoreReverseSearchInput
+        - indexers.dataclasses.VectorStoreReverseSearchOutput
+        - indexers.dataclasses.VectorStoreEmbedInput
+        - indexers.dataclasses.VectorStoreEmbedOutput
+        - indexers.VectorStore.embed
+        - indexers.VectorStore.search
+        - indexers.VectorStore.reverse_search
+        - indexers.VectorStore.from_filespace
+    - title: Servers
+      desc: "Expose ClassifAI functionality via Fast-API endpoints"
+      contents:
+        - servers
+        - servers.main
+        - servers.get_router
+        - servers.get_server
+        - servers.run_server
+        - servers.make_endpoints
diff --git a/_renderer.py b/_renderer.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+from numpydoc.docscrape import NumpyDocString
+from plum import dispatch
+from quartodoc import MdRenderer
+from quartodoc import ast as qast
+
+
+class Renderer(MdRenderer):
+    style = "siuba"
+
+    @dispatch
+    def render(self, el):
+        """General render method.
+        Note: overloading of `render` enabled via plum.dispatch to allow different
+        rendering behaviour for some elements.
+        """
+        prev_obj = getattr(self, "crnt_obj", None)
+        self.crnt_obj = el
+        res = super().render(el)
+        self.crnt_obj = prev_obj
+
+        return res
+
+    @dispatch
+    def render(self, el: qast.DocstringSectionSeeAlso):  # noqa: F811
+        """Numpy Docstring style render method.
+        Note: overloading of `render` enabled via plum.dispatch to allow different
+        rendering behaviour for some elements.
+        """
+        lines = el.value.split("\n")
+
+        # each entry in result has form: ([('func1', '<directive>), ...], <description>)
+        parsed = NumpyDocString("")._parse_see_also(lines)
+
+        result = []
+        for funcs, description in parsed:
+            links = [f"[{name}](`{self._name_to_target(name)}`)" for name, role in funcs]
+
+            str_links = ", ".join(links)
+
+            if description:
+                str_description = "<br>".join(description)
+                result.append(f"{str_links}: {str_description}")
+            else:
+                result.append(str_links)
+
+        return "*\n".join(result)
+
+    def _name_to_target(self, name: str):
+        """Helper method to convert a function/class name to a full target path,
+        used for Numpy Docstring style render method.
+        """
+        crnt_path = getattr(self.crnt_obj, "path", None)
+        parent = crnt_path.rsplit(".", 1)[0] + "."
+        pkg = "classifai."
+
+        if crnt_path and not (name.startswith(pkg) or name.startswith(parent)):
+            return f"{parent}{name}"
+        elif not name.startswith(pkg):
+            return f"{pkg}{name}"
+
+        return name
diff --git a/index.qmd b/index.qmd
@@ -0,0 +1,3 @@
+# Package Overview {.unnumbered}
+
+{{< include README.md >}}
diff --git a/pyproject.toml b/pyproject.toml
@@ -127,6 +127,9 @@ dev = [
     "mkdocstrings-python >= 1.16.7",
     "mkdocs-material >= 9.6.9",
     "deptry>=0.24.0",
+    "quarto>=0.1.0",
+    "quartodoc>=0.11.1",
+    "numpydoc>=1.10.0",
     {include-group = "test"},
 ]
 test = [

diff --git a/src/classifai/_optional.py b/src/classifai/_optional.py
@@ -23,7 +23,7 @@ def check_deps(reqs: list[str], extra: str | None = None) -> None:
 
     Args:
         reqs (list[str]): A list of package names to check.
-        extra (str, optional): The name of the extra installation group. Defaults to None.
+        extra (str): [optional] The name of the extra installation group. Defaults to None.
 
     Raises:
         OptionalDependencyError: If any of the required packages are not installed.

diff --git a/src/classifai/indexers/__init__.py b/src/classifai/indexers/__init__.py
@@ -1,4 +1,30 @@
-"""Indexers package."""
+# pylint: disable=C0301
+"""This module provides functionality for creating a vector index from a text file.
+It defines the `VectorStore` class, which is used to model and create vector databases
+from CSV text files using a vectoriser object.
+
+This class interacts with the Vectoriser class from the vectorisers submodule,
+expecting that any vector model used to generate embeddings used in the
+VectorStore objects is an instance of one of these classes, most notably
+that each vectoriser object should have a transform method.
+
+Key Features:
+- Batch processing of input files to handle large datasets.
+- Support for CSV file format (additional formats may be added in future updates).
+- Integration with a custom embedder for generating vector embeddings.
+- Logging for tracking progress and handling errors during processing.
+
+Dependencies:
+- polars: For handling data in tabular format and saving it as a Parquet file.
+- tqdm: For displaying progress bars during batch processing.
+- numpy: for vector cosine similarity calculations
+- A custom file iterator (`iter_csv`) for reading input files in batches.
+
+Usage:
+This module is intended to be used with the Vectoriers mdodule and the
+the servers module from ClassifAI, to created scalable, modular, searchable
+vector databases from your own text data.
+"""
 
 from .dataclasses import (
     VectorStoreEmbedInput,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Package Overview {.unnumbered}

		{{< include README.md >}}