diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 414d45c..26928b5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: yamllint exclude: pre-commit-config.yaml - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.15.10" + rev: "v0.15.11" hooks: - id: ruff-format - id: ruff-check diff --git a/README.md b/README.md index e452194..b8d92e6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - + ![PyPI - Version](https://img.shields.io/pypi/v/ome-arrow) [![Build Status](https://github.com/wayscience/ome-arrow/actions/workflows/run-tests.yml/badge.svg?branch=main)](https://github.com/wayscience/ome-arrow/actions/workflows/run-tests.yml?query=branch%3Amain) @@ -198,6 +198,8 @@ Please see our [contributing documentation](https://github.com/wayscience/ome-ar OME Arrow is used or inspired by the following projects, check them out! - [`napari-ome-arrow`](https://github.com/WayScience/napari-ome-arrow): enables you to view OME Arrow and related images. -- [`nViz`](https://github.com/WayScience/nViz): focuses on ingesting and visualizing various 3D image data. - [`CytoDataFrame`](https://github.com/cytomining/CytoDataFrame): provides a DataFrame-like experience for viewing feature and microscopy image data within Jupyter notebook interfaces and creating OME Parquet files. - [`coSMicQC`](https://github.com/cytomining/coSMicQC): performs quality control on microscopy feature datasets, visualized using CytoDataFrames. +- [`pycytominer`](https://github.com/cytomining/pycytominer): supports feature profiling, normalization, and downstream analysis workflows for image-based profiling datasets. +- [`iceberg-bioimage`](https://github.com/WayScience/iceberg-bioimage): defines warehouse-oriented patterns for connecting bioimage formats and analytical tables at scale. +- [`CytoTable`](https://github.com/cytomining/CytoTable): converts image-based profiling outputs into analysis-ready tabular formats such as Parquet. diff --git a/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/_extension.yml b/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/_extension.yml new file mode 100644 index 0000000..764ce84 --- /dev/null +++ b/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/_extension.yml @@ -0,0 +1,10 @@ +title: Poster (modified) +author: Carlos Scheidegger, Dave Bunten +version: 1.0.0 +quarto-required: ">=1.4.415" +contributes: + formats: + typst: + template-partials: + - typst-template.typ + - typst-show.typ diff --git a/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/typst-show.typ b/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/typst-show.typ new file mode 100644 index 0000000..1361b23 --- /dev/null +++ b/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/typst-show.typ @@ -0,0 +1,74 @@ +// Typst custom formats typically consist of a 'typst-template.typ' (which is +// the source code for a typst template) and a 'typst-show.typ' which calls the +// template's function (forwarding Pandoc metadata values as required) +// +// This is an example 'typst-show.typ' file (based on the default template +// that ships with Quarto). It calls the typst function named 'article' which +// is defined in the 'typst-template.typ' file. +// +// If you are creating or packaging a custom typst template you will likely +// want to replace this file and 'typst-template.typ' entirely. You can find +// documentation on creating typst templates here and some examples here: +// - https://typst.app/docs/tutorial/making-a-template/ +// - https://github.com/typst/templates + +#show: doc => poster( + $if(title)$ title: [$title$], $endif$ + // TODO: use Quarto's normalized metadata. + $if(poster-authors)$ authors: [$poster-authors$], $endif$ + $if(departments)$ departments: [$departments$], $endif$ + $if(size)$ size: "$size$", $endif$ + + // Institution logo. + $if(institution-logo)$ univ_logo: "$institution-logo$", $endif$ + + // Footer text. + // For instance, Name of Conference, Date, Location. + // or Course Name, Date, Instructor. + $if(footer-text)$ footer_text: [$footer-text$], $endif$ + + // Any URL, like a link to the conference website. + $if(footer-url)$ footer_url: [$footer-url$], $endif$ + + // Emails of the authors. + $if(footer-emails)$ footer_email_ids: [$footer-emails$], $endif$ + + // Color of the footer. + $if(footer-color)$ footer_color: "$footer-color$", $endif$ + + // DEFAULTS + // ======== + // For 3-column posters, these are generally good defaults. + // Tested on 36in x 24in, 48in x 36in, and 36in x 48in posters. + // For 2-column posters, you may need to tweak these values. + // See ./examples/example_2_column_18_24.typ for an example. + + // Any keywords or index terms that you want to highlight at the beginning. + $if(keywords)$ keywords: ($for(keywords)$"$it$"$sep$, $endfor$), $endif$ + + // Number of columns in the poster. + $if(num-columns)$ num_columns: $num-columns$, $endif$ + + // University logo's scale (in %). + $if(univ-logo-scale)$ univ_logo_scale: $univ-logo-scale$, $endif$ + + // University logo's column size (in in). + $if(univ-logo-column-size)$ univ_logo_column_size: $univ-logo-column-size$, $endif$ + + // Title and authors' column size (in in). + $if(title-column-size)$ title_column_size: $title-column-size$, $endif$ + + // Poster title's font size (in pt). + $if(title-font-size)$ title_font_size: $title-font-size$, $endif$ + + // Authors' font size (in pt). + $if(authors-font-size)$ authors_font_size: $authors-font-size$, $endif$ + + // Footer's URL and email font size (in pt). + $if(footer-url-font-size)$ footer_url_font_size: $footer-url-font-size$, $endif$ + + // Footer's text font size (in pt). + $if(footer-text-font-size)$ footer_text_font_size: [$footer-text-font-size$], $endif$ + + doc, +) diff --git a/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/typst-template.typ b/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/typst-template.typ new file mode 100644 index 0000000..ffb2786 --- /dev/null +++ b/docs/presentations/2026-OME-community-meeting/_extensions/quarto-ext/poster/typst-template.typ @@ -0,0 +1,211 @@ +#let poster( + // set variables for use throughout + // note: some are referenced from `.qmd` file + size: "'36x24' or '48x36''", + title: "Paper Title", + authors: "Author Names (separated by commas)", + departments: "Department Name", + univ_logo: "Logo Path", + footer_text: " ", + footer_url: "Footer URL", + footer_email_ids: "Email IDs (separated by commas)", + footer_color: "Hex Color Code", + keywords: (), + num_columns: "4", + univ_logo_scale: "140", + univ_logo_column_size: "10", + title_column_size: "33", + title_font_size: "42", + authors_font_size: "32", + footer_url_font_size: "40", + footer_text_font_size: "40", + body +) = { + // initialize template display formatting + set text(font: "Lato", size: 26pt) + let sizes = size.split("x") + let width = int(sizes.at(0)) * 1in + let height = int(sizes.at(1)) * 1in + univ_logo_scale = int(univ_logo_scale) * 1% + title_font_size = int(title_font_size) * 1pt + authors_font_size = int(authors_font_size) * 1pt + num_columns = int(num_columns) + univ_logo_column_size = int(univ_logo_column_size) * 1in + title_column_size = int(title_column_size) * 1in + footer_url_font_size = int(footer_url_font_size) * 1pt + footer_text_font_size = int(footer_text_font_size) * 1pt + + // create overall page output + set page( + // total dimensions + width: width, + height: height, + // margin on all sides + margin: + (top: .8in, left: .8in, right: .8in, bottom: 1.8in), + // footer section + footer: [ + #set align(center) + #set text(42pt) + #block( + fill: rgb(footer_color), + width: 100%, + inset: 20pt, + radius: 10pt, + // adds text to footer + [ + #text(font: "Lato", size: footer_url_font_size, footer_url) + #h(1fr) + #text(size: footer_text_font_size, smallcaps(footer_text)) + #h(1fr) + #text(font: "Lato", size: footer_url_font_size, footer_email_ids) + ] + ) + ] + ) + + // set math display properties + set math.equation(numbering: "(1)") + show math.equation: set block(spacing: 0.65em) + + set enum(indent: 10pt, body-indent: 9pt) + set list(indent: 10pt, body-indent: 9pt) + + // set the heading numbering system + set heading(numbering: "I.A.1.") + show heading: it => context { + // Get numbering tuple for *this* heading at the current location. + let levels = counter(heading).at(here()) + let deepest = if levels != () { levels.last() } else { 1 } + + // defines how sub-headers display + set text(25pt, weight: 400) + + // sub-header level 0 + if it.level == 0 [ + #set text(style: "italic") + #v(32pt, weak: true) + #if it.numbering != none { + numbering("i.", deepest) + h(7pt, weak: true) + } + #it.body + + // sub-header level 1 + ] else if it.level == 1 [ + #v(10pt, weak: true) + #set align(left) + #set text({ 44pt }, weight: 600, font: "Vollkorn", fill: rgb("#2A6F63")) + #v(45pt, weak: true) + #if it.numbering != none { + numbering("I.", deepest) + h(7pt, weak: true) + } + #it.body + #v(25pt, weak: true) + #line(length: 100%, stroke: rgb(200, 200, 200)) + #v(25pt, weak: true) + + // all other headers + ] else [ + #set text({ 36pt }, weight: 600, font: "Vollkorn", fill: rgb("#3E877A"), style: "italic") + #if it.level == 2 { + numbering("⧈ a)", deepest) + [ ] + } + #it.body + #v(40pt, weak: true) + ] +} + + // header grid +align(left, + grid( + // add one more column at the start for the left-side image + rows: (auto, auto), + columns: (210pt, title_column_size, univ_logo_column_size), + column-gutter: 25pt, + row-gutter: 30pt, + + // left-side image cell + grid.cell( + pad(top: -18pt, + image("images/ome-arrow-logo.png", width: 210pt), + ), + rowspan: 3, + align: left, + ), + + // main title + grid.cell( + [ + #show "➶": it => text(weight: 800, fill: rgb("#000000"))[#it] + #set par(linebreaks: "simple") + #text( + font: ("Vollkorn", "Apple Symbols", "Zapf Dingbats", "Arial Unicode MS"), + weight: 500, + size: title_font_size, + fill: rgb("#2A6F63"), + )[#title] + ], + align: left, + ), + + // university logo on the far right + grid.cell( + pad(top: 2pt, pad( left: 20pt, + image(univ_logo, width: 130%), + )), + rowspan: 3, + align: right, + ), + + // author display + pad(top: 5pt, text(size: 28pt, authors)), + + // department and notes display + pad(top: 5pt, text(size: 26pt, emph(departments))) + ) + ) + + // spacing between the header and body + v(40pt) + + // set main body display + show: columns.with(num_columns, gutter: 60pt) + // paragraph display properties + set par(leading: 10pt, + justify: false, + first-line-indent: 0em, + linebreaks: "optimized" + ) + + // Style inline code spans from markdown backticks. + show raw.where(block: false): set text(weight: 700, fill: rgb("#1E4F8C")) + + // Style hyperlinks with a yellow/orange tone by default. + show link: set text(fill: rgb("#A85A00")) + + // Configure figures. + show figure: it => block({ + // Display a backdrop rectangle. + it.body + + // Display caption. + if it.has("caption") { + set align(left) + v(if it.has("gap") { it.gap } else { 24pt }, weak: true) + let fig_levels = counter(figure).at(here()) + let fig_num = if fig_levels != () { fig_levels.last() } else { 1 } + [ + #text(weight: "bold")[Figure #fig_num:] + #h(6pt) + #text(weight: "regular")[#it.caption.body] + ] + } + + }) + + // adds body content to page + body +} diff --git a/docs/presentations/2026-OME-community-meeting/abstract.md b/docs/presentations/2026-OME-community-meeting/abstract.md new file mode 100644 index 0000000..b0af132 --- /dev/null +++ b/docs/presentations/2026-OME-community-meeting/abstract.md @@ -0,0 +1,19 @@ +# 2026 OME Community Meeting - OME-Arrow + +## Authors + +Dave Bunten, Jenna Tomkinson, Michael Lippincott, Cameron Mattson, Julia B. Curd, and Gregory P. Way + +## Title + +OME-Arrow: Unifying Images, Metadata, and Morphology in an Interoperable Data Model for High-Content Imaging + +## Abstract + +Modern bioimaging workflows combine images, metadata, and derived measurements across many tools, but these components are often stored in incompatible formats and disconnected systems. This fragmentation makes it difficult to join data, reproduce analyses, and scale from small experiments to large, multi-sample studies. + +OME-Arrow is a data model and toolkit for working with bioimaging data in modern analytical environments, where data are processed in code, queried with SQL, and analyzed across tools such as Python and R. It brings images, metadata, and derived measurements into a single structure organized as linked tables, rather than leaving them split across separate files and systems. This allows imaging data to be directly joined, filtered, and analyzed using familiar operations, enabling image-derived measurements, metadata, and experimental context to be queried together in a single system. In contrast to existing workflows, where these relationships must be manually reconstructed across files and tools, OME-Arrow makes them explicit and queryable. + +OME-Arrow builds on Open Microscopy Environment (OME) conventions and represents data using Apache Arrow, a columnar in-memory data format designed for fast analytics and efficient data sharing across programming languages. It supports ingestion from formats such as TIFF, OME-Zarr, and NumPy, and export to Arrow-native formats (e.g., Parquet, Lance, Vortex) as well as OME-TIFF and OME-Zarr. Data can be processed directly in standalone workflows using these formats, enabling local analysis, scripting, and integration with tools such as SQL engines and DuckDB. For larger-scale use cases, the same data can be organized into an Apache Iceberg-style table structure, which supports dataset versioning, schema evolution, and concurrent access across systems. These two modes use the same underlying data model, allowing workflows to scale from local analysis to warehouse environments without restructuring data. The library also provides lazy scan-style access for large datasets, supports tensor-based pathways for machine learning, and integrates with napari-ome-arrow for advanced visualization and CytoDataFrame for feature-centric analysis within Jupyter notebook environments. + +These capabilities enable end-to-end image-based profiling workflows in which raw images, single-cell features, and experimental metadata are analyzed together without intermediate data reshaping. In pediatric cancer research settings, this supports direct querying across imaging data and derived measurements, enabling researchers to relate cellular morphology to perturbations such as compounds, genetic modifications, or treatment conditions. By making these relationships explicit and queryable, OME-Arrow reduces the need for custom data integration steps and improves the consistency of downstream analyses. This approach is being applied to pediatric cancer datasets in collaboration with Alex’s Lemonade Stand Foundation, where integrated access to imaging and profiling data supports systematic exploration of phenotype–treatment relationships and more reproducible analytical workflows. diff --git a/docs/presentations/2026-OME-community-meeting/images/cu-anschutz-short.png b/docs/presentations/2026-OME-community-meeting/images/cu-anschutz-short.png new file mode 100644 index 0000000..4fab376 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/cu-anschutz-short.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/cytomining-integration.png b/docs/presentations/2026-OME-community-meeting/images/cytomining-integration.png new file mode 100644 index 0000000..df903d5 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/cytomining-integration.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/cytotable-parquet-benchmarks.png b/docs/presentations/2026-OME-community-meeting/images/cytotable-parquet-benchmarks.png new file mode 100644 index 0000000..29ca58c Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/cytotable-parquet-benchmarks.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/example-umap.png b/docs/presentations/2026-OME-community-meeting/images/example-umap.png new file mode 100644 index 0000000..ff5bd60 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/example-umap.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/header-combined-images.png b/docs/presentations/2026-OME-community-meeting/images/header-combined-images.png new file mode 100644 index 0000000..46723d1 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/header-combined-images.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/icebeg-bioimage-ome-zarr-integration.png b/docs/presentations/2026-OME-community-meeting/images/icebeg-bioimage-ome-zarr-integration.png new file mode 100644 index 0000000..bf61ac6 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/icebeg-bioimage-ome-zarr-integration.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/ome-arrow-benchmarks.png b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-benchmarks.png new file mode 100644 index 0000000..3172c00 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-benchmarks.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/ome-arrow-logo.png b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-logo.png new file mode 100644 index 0000000..3fcd1da Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-logo.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/ome-arrow-qr-text.png b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-qr-text.png new file mode 100644 index 0000000..7a6d9da Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-qr-text.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/ome-arrow-qr.png b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-qr.png new file mode 100644 index 0000000..b1dfba1 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/ome-arrow-qr.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/pccma-cytodataframe-cosmicqc-result-example.png b/docs/presentations/2026-OME-community-meeting/images/pccma-cytodataframe-cosmicqc-result-example.png new file mode 100644 index 0000000..168d78f Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/pccma-cytodataframe-cosmicqc-result-example.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/spacer.png b/docs/presentations/2026-OME-community-meeting/images/spacer.png new file mode 100644 index 0000000..0a73954 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/spacer.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/what-is-ome-arrow.png b/docs/presentations/2026-OME-community-meeting/images/what-is-ome-arrow.png new file mode 100644 index 0000000..87a16f7 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/what-is-ome-arrow.png differ diff --git a/docs/presentations/2026-OME-community-meeting/images/why-ome-arrow.png b/docs/presentations/2026-OME-community-meeting/images/why-ome-arrow.png new file mode 100644 index 0000000..6e23394 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/images/why-ome-arrow.png differ diff --git a/docs/presentations/2026-OME-community-meeting/ome-arrow-2026-poster.pdf b/docs/presentations/2026-OME-community-meeting/ome-arrow-2026-poster.pdf new file mode 100644 index 0000000..eac09a9 Binary files /dev/null and b/docs/presentations/2026-OME-community-meeting/ome-arrow-2026-poster.pdf differ diff --git a/docs/presentations/2026-OME-community-meeting/poster.qmd b/docs/presentations/2026-OME-community-meeting/poster.qmd new file mode 100644 index 0000000..d8f4a49 --- /dev/null +++ b/docs/presentations/2026-OME-community-meeting/poster.qmd @@ -0,0 +1,80 @@ +--- +title: | + **OME-Arrow** ➶ Unifying Images, Metadata, and Morphology in an Interoperable Data Model for High-Content Imaging +format: + poster-typst: + size: "48x32" + poster-authors: "Dave Bunten¹, Jenna Tomkinson¹, Michael Lippincott¹, Cameron Mattson¹, Julia B. Curd¹, Gregory P. Way¹" + departments: "¹Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus" + institution-logo: "./images/header-combined-images.png" + footer-text: "Open, interoperable, and queryable microscopy images" + footer-url: "OME-Arrow" + footer-emails: "https://github.com/wayscience/ome-arrow" + footer-color: "CBE9E1" +output-file: "ome-arrow-2026-poster.pdf" +--- + +## Why OME-Arrow? + +![Modern bioimaging workflows depend on connecting **images**, **metadata**, and +**derived measurements**. When these are split across disconnected files and systems, analysis is harder +to join, reproduce, and scale. **OME-Arrow** provides a linked, queryable data model for these components in +code-first and SQL-first analytical workflows.](images/why-ome-arrow.png) + +## An interoperable image data model + +![By representing image content as **Arrow-compatible structures** alongside +metadata and features, [OME-Arrow](https://github.com/WayScience/ome-arrow) enables: **Explicit relationships** between pixels, metadata, and derived features, **Direct joins and filtering** in SQL and DataFrame workflows, **Cross-language interoperability** through Arrow-native representations, **Standards alignment** with the broader open bioimaging ecosystem](images/what-is-ome-arrow.png) + +## Benchmarks for OME-Arrow + +![Current benchmark results suggest operation-dependent tradeoffs rather than one universal winner. In [ome-arrow-benchmarks](https://github.com/WayScience/ome-arrow-benchmarks), OME-Zarr can perform strongly for sparse random image access patterns, while Arrow-table-native layouts provide practical advantages for table-centric join/filter workloads and broader analytical interoperability. Lance shows competitive random-access behavior in OME-Arrow-oriented tests, making it a strong candidate for large image-linked table repositories. +](images/ome-arrow-benchmarks.png) + +![In [CytoTable-benchmarks](https://github.com/cytomining/CytoTable-benchmarks), Parquet and other Arrow-compatible tabular workflows generally scale better and run faster at larger data volumes than Zarr- and AnnData-oriented table paths, supporting the use of Arrow-native tables for large-scale profiling analytics.](images/cytotable-parquet-benchmarks.png) + +## Quick technical demonstration + +```python +# install with `pip install ome_arrow` +from ome_arrow import OMEArrow + +# Create an OME-Arrow object from an OME-Zarr +oa = OMEArrow("image.ome.zarr") + +# Export to Parquet directly +oa.export(how="ome-parquet", out="image.parquet") + +# Create and collect a "lazy" crop of an image +lazy = OMEArrow.scan("image.parquet").slice_lazy(0, 512, 0, 512).collect() + +# visualize the image with pyvista for jupyter-friendly views +oa.view(how="pyvista) +``` + +## Full Cytomining integration + +![OME-Arrow complements Cytomining tools in a modular stack: OME-Arrow provides built-in visualization (`matplotlib`, `pyvista`) for direct inspection; [**`napari-ome-arrow`**](https://github.com/WayScience/napari-ome-arrow) adds interactive viewing for OME-Arrow/OME-Parquet data; [**`CytoDataFrame`**](https://github.com/cytomining/CytoDataFrame) supports DataFrame-centered analysis of image-linked features and metadata; [**`coSMicQC`**](https://github.com/cytomining/coSMicQC) provides quality control with image context; [**`buscar`**](https://github.com/WayScience/buscar) enables heterogeneity-aware single-cell compound ranking; and [**`pycytominer`**](https://github.com/cytomining/pycytominer) supports profiling and normalization workflows.](images/cytomining-integration.png) + +## OME-Zarr and iceberg-bioimage + +![OME-Zarr is strong for cloud-native image storage and distribution. OME-Arrow is complementary (not a replacement) when images must be queried with tabular metadata and measurements. One integration pattern uses [`iceberg-bioimage`](https://github.com/WayScience/iceberg-bioimage) as a warehouse/control-plane layer and [`duckdb_zarr`](https://github.com/photometric/duckdb-zarr) for analytical access to OME-Zarr-backed data. Here, **OME-Zarr / OME-TIFF** remain exchange formats, while **OME-Arrow / Parquet / Arrow-native tables** support joins and analytics, with **Lance** as a random-access table option.](images/icebeg-bioimage-ome-zarr-integration.png) + + +## ALSF pediatric cancer research + +![This work supports the **Pediatric Cancer Cell Morphology Atlas (PCCMA)** project, which will study pediatric cancer cell lines together with drug screens to accelerate discoveries that cure disease. The figure shows a `CytoDataFrame`-centered quality-control workflow using `coSMicQC` and Cytomining tooling on image-linked profiling data; in PCCMA analyses, this workflow helps identify and remove outliers for improved downstream analysis quality. We are using OME-Arrow-related work through the Cytomining ecosystem to help build the Atlas (for current implementation context, see [pediatric_cancer_atlas_profiling](https://github.com/WayScience/pediatric_cancer_atlas_profiling)).](images/pccma-cytodataframe-cosmicqc-result-example.png) + +## Acknowledgements + +We thank those who have inspired, contributed, or helped support OME-Arrow and +the broader open bioimaging ecosystem: + +- Open source science from the [__Open Microscopy Environment__](https://github.com/ome) +- The [__napari community__](https://napari.org/) for open, collaborative development around interactive bioimage analysis +- The communities behind [__Apache Arrow__](https://arrow.apache.org/), + [__Apache Iceberg__](https://iceberg.apache.org/), and Cytomining ecosystem projects +- The [__ALSF Childhood Cancer Data Lab__](https://www.ccdatalab.org/) for + pediatric cancer data science leadership and partnership +- Members of the [__Way Lab__](https://www.waysciencelab.com/) at the University of Colorado Anschutz Medical Campus +- [__Department of Biomedical Informatics__](https://medschool.cuanschutz.edu/dbmi) within the School of Medicine at the University of Colorado Anschutz Medical Campus diff --git a/docs/presentations/2026-OME-community-meeting/readme.md b/docs/presentations/2026-OME-community-meeting/readme.md new file mode 100644 index 0000000..9d25b8d --- /dev/null +++ b/docs/presentations/2026-OME-community-meeting/readme.md @@ -0,0 +1,57 @@ +# 2026-OME-community-meeting poster + +The content here is for creating a poster for the 2026 OME Community Meeting. + +## Poster Details + +The poster boards provided will be 32”Hx54”W. + +## Poster development + +We use [Quarto](https://github.com/quarto-dev/quarto-cli)'s [Typst](https://github.com/typst/typst) [integration](https://quarto.org/docs/output-formats/typst.html) through a Quarto extension for posters under [`quarto-ext/typst-templates/poster`](https://github.com/quarto-ext/typst-templates/tree/main/poster). +Related [Poe the Poet](https://poethepoet.natn.io/index.html) tasks are defined to run processes defined within `pyproject.toml` under the section `[tool.poe.tasks]`. + +See the following examples for more information: + +```bash +# preview the poster during development +uv run poe poster-preview + +# build the poster PDF from source +uv run poe poster-render +``` + +## References + +- Fonts were sourced locally for rendering within Quarto and Typst: + - [Vollkorn](https://fonts.google.com/specimen/Vollkorn) + - [Lato](https://fonts.google.com/specimen/Lato) +- QR codes with images were generated and saved manually via [https://github.com/lyqht/mini-qr](https://github.com/lyqht/mini-qr) +- [ImageMagick](http://www.imagemagick.org/) was, for example, used to form the bottom logos together as one and render the poster PDF as PNG (among other tasks) using the following commands: + +```shell +# append text to qr codes +magick images/ome-arrow-qr.png -gravity South -background transparent -splice 0x15 -pointsize 40 -font Arial -weight Bold -annotate 0x15 'Scan for GitHub!' images/ome-arrow-qr-text.png + +# create a transparent spacer +magick -size 100x460 xc:transparent images/spacer.png + +magick images/ome-arrow-qr-text.png -resize x460 images/ome-arrow-qr-text.png +magick images/bssw-logo-w-background.png -resize x460 images/bssw-logo-w-background.png +magick images/sustainable-horizons-institute-logo.png -resize x460 images/sustainable-horizons-institute-logo.png +magick images/cu-anschutz-short.png -resize x460 images/cu-anschutz-short.png +# combine the images together as one using the spacer for separation +magick -background none images/ome-arrow-qr-text.png images/spacer.png images/bssw-logo-w-background.png images/spacer.png images/sustainable-horizons-institute-logo.png images/spacer.png images/cu-anschutz-short.png +append images/header-combined-images.png + +# convert the poster pdf to png and jpg with 150 dpi and a white background +magick -antialias -density 300 -background white -flatten poster.pdf poster.png +magick -antialias -density 300 -background white -flatten poster.pdf poster.jpg + +# create the title with clip path through svg (typst doesn't support https://github.com/typst/typst/issues/5611) +magick forest_modified.png -resize 5700x400^ -gravity center -extent 5700x400 \ + \( -background none -fill white \ + -font "Vollkorn-Bold" -pointsize 340 \ + label:"The Software Gardening Almanack" \ + -gravity West -extent 5700x400 \) \ + -compose copy_opacity -composite title-text.png +``` diff --git a/docs/src/_static/logo.png b/docs/src/_static/logo.png deleted file mode 100644 index 555f1cb..0000000 Binary files a/docs/src/_static/logo.png and /dev/null differ diff --git a/docs/src/_static/ome-arrow-logo.png b/docs/src/_static/ome-arrow-logo.png new file mode 100644 index 0000000..3fcd1da Binary files /dev/null and b/docs/src/_static/ome-arrow-logo.png differ diff --git a/docs/src/_static/ome-arrow-with-text.png b/docs/src/_static/ome-arrow-with-text.png new file mode 100644 index 0000000..3a0f516 Binary files /dev/null and b/docs/src/_static/ome-arrow-with-text.png differ diff --git a/docs/src/index.md b/docs/src/index.md index 0d68586..cf5e62b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -14,5 +14,7 @@ maxdepth: 3 --- python-api dlpack +why-ome-arrow +presentations examples/learning_to_fly_with_ome-arrow ``` diff --git a/docs/src/presentations.md b/docs/src/presentations.md new file mode 100644 index 0000000..f4e19fb --- /dev/null +++ b/docs/src/presentations.md @@ -0,0 +1,5 @@ +# Presentations + +## OME Community Meeting 2026 + +- Poster PDF: [OME-Arrow 2026 poster (WayScience/ome-arrow)](https://github.com/WayScience/ome-arrow/blob/main/docs/presentations/2026-OME-community-meeting/ome-arrow-2026-poster.pdf) diff --git a/docs/src/why-ome-arrow.md b/docs/src/why-ome-arrow.md new file mode 100644 index 0000000..17e5d25 --- /dev/null +++ b/docs/src/why-ome-arrow.md @@ -0,0 +1,56 @@ +# Why OME Arrow? + +OME Arrow is a table-oriented representation for OME-aligned image data built on Apache Arrow and commonly persisted as Parquet. +It complements, rather than replaces, Open Microscopy Environment (OME)-Zarr and OME-TIFF. +OME-Zarr and OME-TIFF are core image formats for storage, exchange, and visualization, while OME Arrow focuses on query-centric analytics where image payloads and tabular data need to be handled together. +Open Microscopy Environment (OME)-Zarr is already a strong default for many bioimaging workflows. +It provides cloud-oriented, multiscale image storage and standardized metadata through the Next-Generation File Formats (NGFF) / OME-Zarr specification. +OME Arrow is also cloud-oriented, especially for object-store-backed, table-native analytics workflows in Arrow/Parquet ecosystems. + +This page explains why OME Arrow still matters. + +## Where current OME formats are strong + +- [OME-TIFF](https://ome-model.readthedocs.io/en/stable/ome-tiff/index.html): + mature image + OME-XML (Extensible Markup Language) metadata packaging in TIFF (Tagged Image File Format)-based files. +- [OME-Zarr (NGFF)](https://ngff.openmicroscopy.org/0.5/): + standardized Zarr hierarchy, multiscales, labels, and HCS (high-content screening) plate/well metadata. + +These formats are excellent for image representation and interoperability. + +## Gap OME Arrow is designed to address + +In analysis-heavy workflows, image pixels often need to be handled together with tabular data (for example per-cell features, QC (quality control) metrics, and joins across many images). +OME-Zarr and OME-TIFF define image data structures well, but they are not table formats. +The [Zarr core specification](https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html) is centered on typed N-dimensional arrays and groups rather than a canonical table model. +The [OME-Zarr specification (v0.5)](https://ngff.openmicroscopy.org/0.5/) adds strong guidance for image and metadata layout, but does not define a single cross-project table specification. +In practice, teams often introduce project-level table conventions inside Zarr hierarchies (for example, [Annotated Data (AnnData) in Zarr](https://anndata.readthedocs.io/en/stable/fileformat-prose.html)), which can produce surprising or inconsistent data structures across tools. + +OME Arrow uses [Apache Arrow](https://arrow.apache.org/overview/) primitives to represent image payloads as typed, queryable values inside table-like data systems (including Parquet). +This makes image-linked analytics easier in SQL (Structured Query Language) / DataFrame-style pipelines. + +## OME Arrow vs OME-Zarr + +OME Arrow is not a universal replacement for OME-Zarr. + +- Use OME-Zarr when your primary need is standards-based multiscale image storage and ecosystem compatibility for image-first tools. +- Use OME Arrow when your primary need is tighter integration between image data and tabular analytics workflows. +- Use both when needed: OME-Zarr for distribution/visualization paths and OME Arrow for query-centric pipelines. + +## Preliminary benchmark signal + +Preliminary results in [ome-arrow-benchmarks](https://github.com/WayScience/ome-arrow-benchmarks) show that outcomes are highly workload- and layout-dependent. +In the repository's synthetic wide-table plus image-column runs, Arrow-table-native backends can reduce full-table read time and storage size relative to some alternatives, while write performance varies by backend. +In the OME-Arrow-only benchmark that compares against directory-per-image OME-Zarr and TIFF layouts, full write/read timings and random-read timings diverge in different directions depending on operation type. +In that same OME-Arrow-only setup, Lance showed random-read timing similar to OME-Zarr (about 0.020 seconds vs about 0.019 seconds average), suggesting Lance can be a practical OME-Arrow-based option for large image repositories when table-native workflows are desired. +The same preliminary run also showed a larger Lance on-disk footprint than OME-Zarr, so this should be treated as an access-pattern tradeoff rather than a universal storage recommendation. +This interpretation is consistent with the Lance paper's focus on random access in columnar storage ([Pace et al., 2025](https://doi.org/10.48550/arXiv.2504.15247)). +This impacts comparisons directly: these are not pure "format A vs format B" tests, because the benchmark also reflects table model choices, directory layout choices, and access pattern choices. +For this reason, benchmark results should be treated as preliminary guidance for scenario fit, not universal rankings. + +## Why this matters for big-picture data repositories in image-based profiling + +[iceberg-bioimage](https://github.com/WayScience/iceberg-bioimage) is one concrete example: it positions [Apache Iceberg](https://iceberg.apache.org/) as a control plane (cataloging, schemas, joins, snapshots), while Zarr and OME-TIFF remain data-plane formats. +Its README also lists OME Arrow as an optional integration for Arrow-native tabular image payloads and lazy image access. + +That is the key fit: OME Arrow helps bridge bioimage formats and modern table engines without requiring every workflow to abandon OME-Zarr. diff --git a/pyproject.toml b/pyproject.toml index f25fa87..aebe9c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,3 +151,13 @@ pytest tasks.jupyter.shell = """ jupyter lab """ +tasks.poster-preview.shell = """ +if command -v quarto >/dev/null 2>&1; then + quarto preview docs/presentations/2026-OME-community-meeting/poster.qmd --no-browser +elif [ -x /Applications/quarto/bin/quarto ]; then + /Applications/quarto/bin/quarto preview docs/presentations/2026-OME-community-meeting/poster.qmd --no-browser +else + echo "quarto not found. Install Quarto or add it to PATH." + exit 1 +fi +"""