From a6139f4db754e00e90103b9ac33d979a5348329f Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Sat, 9 May 2026 03:11:58 +0200 Subject: [PATCH 1/9] Added benchmarks vs S2, Boost.Geometry, GeographicLib --- .gitignore | 1 + CMakeLists.txt | 10 +- README.md | 21 +++ benchmarks/CMakeLists.txt | 116 +++++++++++++ benchmarks/README.md | 107 ++++++++++++ benchmarks/common/random_data.hpp | 67 ++++++++ benchmarks/size/consumer_boost.cpp | 31 ++++ benchmarks/size/consumer_geo_utils.cpp | 21 +++ benchmarks/size/consumer_geographiclib.cpp | 21 +++ benchmarks/size/consumer_naive.cpp | 59 +++++++ benchmarks/size/consumer_s2.cpp | 29 ++++ benchmarks/size/measure.sh | 157 +++++++++++++++++ benchmarks/speed/bench_boost.cpp | 136 +++++++++++++++ benchmarks/speed/bench_geo_utils.cpp | 75 +++++++++ benchmarks/speed/bench_geographiclib.cpp | 100 +++++++++++ benchmarks/speed/bench_naive.cpp | 42 +++++ benchmarks/speed/bench_s2.cpp | 106 ++++++++++++ docs/benchmarks.md | 186 +++++++++++++++++++++ 18 files changed, 1283 insertions(+), 2 deletions(-) create mode 100644 benchmarks/CMakeLists.txt create mode 100644 benchmarks/README.md create mode 100644 benchmarks/common/random_data.hpp create mode 100644 benchmarks/size/consumer_boost.cpp create mode 100644 benchmarks/size/consumer_geo_utils.cpp create mode 100644 benchmarks/size/consumer_geographiclib.cpp create mode 100644 benchmarks/size/consumer_naive.cpp create mode 100644 benchmarks/size/consumer_s2.cpp create mode 100755 benchmarks/size/measure.sh create mode 100644 benchmarks/speed/bench_boost.cpp create mode 100644 benchmarks/speed/bench_geo_utils.cpp create mode 100644 benchmarks/speed/bench_geographiclib.cpp create mode 100644 benchmarks/speed/bench_naive.cpp create mode 100644 benchmarks/speed/bench_s2.cpp create mode 100644 docs/benchmarks.md diff --git a/.gitignore b/.gitignore index ce379f0..7adb8bb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build/ +build-*/ # Compiled Object files *.slo diff --git a/CMakeLists.txt b/CMakeLists.txt index a55b6d2..c1e7d29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,8 +8,9 @@ else() endif() include(CMakeDependentOption) -option(GEO_UTILS_CPP_BUILD_TESTS "Build geo-utils-cpp tests" ${_geo_utils_cpp_is_top_level}) -option(GEO_UTILS_CPP_BUILD_EXAMPLES "Build geo-utils-cpp examples" ${_geo_utils_cpp_is_top_level}) +option(GEO_UTILS_CPP_BUILD_TESTS "Build geo-utils-cpp tests" ${_geo_utils_cpp_is_top_level}) +option(GEO_UTILS_CPP_BUILD_EXAMPLES "Build geo-utils-cpp examples" ${_geo_utils_cpp_is_top_level}) +option(GEO_UTILS_CPP_BUILD_BENCHMARKS "Build geo-utils-cpp benchmarks" OFF) cmake_dependent_option(GEO_UTILS_CPP_ENABLE_COVERAGE "Enable gcov coverage instrumentation (GCC/Clang only)" OFF "GEO_UTILS_CPP_BUILD_TESTS" OFF) @@ -98,6 +99,11 @@ if(GEO_UTILS_CPP_BUILD_EXAMPLES) add_subdirectory(examples) endif() +# Benchmarks (opt-in: pulls Google Benchmark and optionally S2/Boost/GeographicLib) +if(GEO_UTILS_CPP_BUILD_BENCHMARKS) + add_subdirectory(benchmarks) +endif() + # Installation include(GNUInstallDirs) diff --git a/README.md b/README.md index cd122d0..859c46f 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,27 @@ int main() { } ``` +## Benchmarks + +`geo-utils-cpp` is a near-zero-overhead wrapper over the math itself, with a +tiny disk footprint thanks to header-only + zero dependencies. + +| Library | `distance_between` (M pairs/s) | `contains` (poly N=100, M qps) | Install size | +| -------------------- | -----------------------------: | -----------------------------: | ------------: | +| **geo-utils-cpp** | **36.7** | **2.17** | **32 KB** | +| naive haversine | 37.0 | — | 0 | +| S2 Geometry | 14.3 | 18.0 | 32.8 MB | +| Boost.Geometry | 40.0 | 0.23 | 12.3 MB | +| GeographicLib | 1.2 | no native PIP | 4.6 MB | + +Apple M1 · clang 17 · `-O2 -DNDEBUG`. We're tied with hand-written haversine +on raw math, ~7× faster than Boost.Geometry on point-in-polygon, and have a +**144–1000× smaller install footprint** than the alternatives. S2 wins on +large-polygon containment thanks to spatial indexing. + +See [docs/benchmarks.md](docs/benchmarks.md) for the full methodology, all +operations, and a discussion of when to reach for each library. + ## API Reference See [docs/api.md](docs/api.md) for the full API reference. diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 0000000..7c00b8a --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,116 @@ +# Benchmarks for geo-utils-cpp. +# +# Build with: +# cmake -B build -DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON +# cmake --build build --target bench_all +# +# Google Benchmark is fetched automatically. S2, Boost.Geometry, and +# GeographicLib are looked up via find_package — missing competitors are +# skipped with a status message instead of causing a hard error. + +include(FetchContent) + +# --- Google Benchmark ------------------------------------------------------- + +set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) +set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) +set(BENCHMARK_INSTALL_DOCS OFF CACHE BOOL "" FORCE) +set(BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "" FORCE) + +FetchContent_Declare( + google_benchmark + URL https://github.com/google/benchmark/archive/v1.8.4.tar.gz + DOWNLOAD_EXTRACT_TIMESTAMP ON +) +FetchContent_MakeAvailable(google_benchmark) + +# --- Shared infrastructure -------------------------------------------------- + +add_library(geo_utils_cpp_bench_common INTERFACE) +target_include_directories(geo_utils_cpp_bench_common INTERFACE + ${CMAKE_CURRENT_SOURCE_DIR}/common +) +target_compile_features(geo_utils_cpp_bench_common INTERFACE cxx_std_17) + +# Aggregator target: depend on every bench that ends up being built. +add_custom_target(bench_all) + +function(_geo_utils_cpp_add_bench name source) + add_executable(bench_${name} ${source}) + target_link_libraries(bench_${name} PRIVATE + geo::utils + geo_utils_cpp_bench_common + benchmark::benchmark_main + ${ARGN} + ) + add_dependencies(bench_all bench_${name}) +endfunction() + +# --- geo-utils-cpp itself + naive haversine baseline ------------------------ + +_geo_utils_cpp_add_bench(geo_utils speed/bench_geo_utils.cpp) +_geo_utils_cpp_add_bench(naive speed/bench_naive.cpp) + +# --- Optional: S2 Geometry -------------------------------------------------- +# +# vcpkg port name: `s2geometry`. Homebrew formula: `s2geometry`. The CMake +# package is conventionally exported as `s2`. +find_package(s2 QUIET) +if(s2_FOUND) + _geo_utils_cpp_add_bench(s2 speed/bench_s2.cpp s2::s2) + message(STATUS "geo-utils-cpp benchmarks: S2 found — bench_s2 enabled.") +else() + message(STATUS "geo-utils-cpp benchmarks: S2 not found — bench_s2 skipped. " + "Install via 'vcpkg install s2geometry' or 'brew install s2geometry'.") +endif() + +# --- Optional: Boost.Geometry ----------------------------------------------- +# +# Boost.Geometry is header-only. CMake 3.30 deprecates the FindBoost module in +# favour of Boost's exported BoostConfig.cmake; we prefer CONFIG and fall back +# to the legacy module so this works across CMake 3.14+. +if(POLICY CMP0167) + cmake_policy(SET CMP0167 NEW) +endif() + +find_package(Boost 1.70 QUIET CONFIG) +if(NOT Boost_FOUND) + find_package(Boost 1.70 QUIET) +endif() + +if(Boost_FOUND) + # Different Boost versions expose the header set as Boost::headers (>=1.71), + # Boost::boost (older), or only via Boost_INCLUDE_DIRS. + if(TARGET Boost::headers) + _geo_utils_cpp_add_bench(boost speed/bench_boost.cpp Boost::headers) + elseif(TARGET Boost::boost) + _geo_utils_cpp_add_bench(boost speed/bench_boost.cpp Boost::boost) + else() + _geo_utils_cpp_add_bench(boost speed/bench_boost.cpp) + target_include_directories(bench_boost PRIVATE ${Boost_INCLUDE_DIRS}) + endif() + message(STATUS "geo-utils-cpp benchmarks: Boost.Geometry found — bench_boost enabled.") +else() + message(STATUS "geo-utils-cpp benchmarks: Boost not found — bench_boost skipped. " + "Install via 'vcpkg install boost-geometry' or 'brew install boost'.") +endif() + +# --- Optional: GeographicLib ------------------------------------------------ +# +# Recent GeographicLib versions export the target `GeographicLib::GeographicLib`. +# Older versions exposed `${GeographicLib_LIBRARIES}` instead — we accept both. +find_package(GeographicLib QUIET) +if(GeographicLib_FOUND) + if(TARGET GeographicLib::GeographicLib) + _geo_utils_cpp_add_bench(geographiclib speed/bench_geographiclib.cpp + GeographicLib::GeographicLib) + else() + _geo_utils_cpp_add_bench(geographiclib speed/bench_geographiclib.cpp + ${GeographicLib_LIBRARIES}) + target_include_directories(bench_geographiclib PRIVATE ${GeographicLib_INCLUDE_DIRS}) + endif() + message(STATUS "geo-utils-cpp benchmarks: GeographicLib found — bench_geographiclib enabled.") +else() + message(STATUS "geo-utils-cpp benchmarks: GeographicLib not found — bench_geographiclib skipped. " + "Install via 'vcpkg install geographiclib' or 'brew install geographiclib'.") +endif() diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..b4743cb --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,107 @@ +# Benchmarks + +This directory measures `geo-utils-cpp` against three popular C++ geometry +libraries on two axes: + +1. **Speed** — Google Benchmark micro-benchmarks for `distance`, `heading`, + `contains`, `area`, and `path_length`. +2. **Disk footprint** — stripped binary size of a minimal consumer plus the + on-disk install size of each library. + +The benchmarks are **not built by default** and **not run in CI**. Set +`-DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON` to opt in. + +For a high-level summary of results and methodology see +[`docs/benchmarks.md`](../docs/benchmarks.md). + +## Competitors + +| Library | Model | Notes | +| -------------------- | ----------- | ------------------------------------------------- | +| **geo-utils-cpp** | sphere | This library — header-only, zero deps | +| **naive haversine** | sphere | ~30 lines of inline math, no library | +| **S2 Geometry** | sphere | Google's mapping library; depends on abseil | +| **Boost.Geometry** | sphere\* | `cs::spherical_equatorial` strategy | +| **GeographicLib** | ellipsoid | Karney's iterative geodesic — more accurate, slower; **no native point-in-polygon** | + +\* Boost.Geometry can also do ellipsoidal; we use the spherical strategy here +to compare apples-to-apples on the algorithm we ourselves implement. + +## Installing competitors + +The CMake build looks each library up via `find_package` and *skips* the +benchmark binary for any competitor it can't find — so you only need to +install the libraries you want to compare against. + +### Homebrew (macOS / Linux) + +```sh +brew install s2geometry boost geographiclib +``` + +### vcpkg + +```sh +vcpkg install s2geometry boost-geometry geographiclib +``` + +(Then point CMake at the vcpkg toolchain file with +`-DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake`.) + +### apt (Debian/Ubuntu) + +S2 is not in apt; install from source or vcpkg. The other two: + +```sh +sudo apt install libboost-dev libgeographiclib-dev +``` + +## Building and running speed benchmarks + +```sh +cmake -S . -B build-bench \ + -DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=Release +cmake --build build-bench --target bench_all -j + +# Run any one binary: +./build-bench/benchmarks/bench_geo_utils +./build-bench/benchmarks/bench_naive +./build-bench/benchmarks/bench_s2 # if S2 was found +./build-bench/benchmarks/bench_boost # if Boost was found +./build-bench/benchmarks/bench_geographiclib # if GeographicLib was found +``` + +Each binary supports the standard Google Benchmark flags. To produce a single +combined JSON report: + +```sh +for b in build-bench/benchmarks/bench_*; do + "$b" --benchmark_format=json --benchmark_out="$(basename $b).json" +done +``` + +## Measuring disk footprint + +```sh +./benchmarks/size/measure.sh +``` + +This builds a minimal "distance + point-in-polygon" consumer against every +library it can find, strips the resulting binary, and reports both the +binary size and the on-disk install size of each library. Override compiler +or flags via `CXX=` and `CXXFLAGS=`. + +## Methodology notes + +- **Same inputs everywhere.** All benchmarks pull data from + `common/random_data.hpp`, which is seeded deterministically. +- **Conversion costs are included** for libraries that don't accept lat/lng + natively (S2 takes `S2Point`; Boost takes its own point type). Pre-converting + outside the timing loop would understate the cost of "I have lat/lng, I want + a distance" — the actual usage pattern. +- **Polygons are pre-built** outside the timing loop because in real code a + geofence is a one-time setup cost. +- **Don't compare GeographicLib's distance/heading head-to-head as "speed".** + It computes a different (more accurate) thing on the WGS84 ellipsoid. + Treat it as a "trade-off" data point. diff --git a/benchmarks/common/random_data.hpp b/benchmarks/common/random_data.hpp new file mode 100644 index 0000000..95bc50c --- /dev/null +++ b/benchmarks/common/random_data.hpp @@ -0,0 +1,67 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 +// +// Deterministic random data shared by every benchmark binary, so that all +// libraries compete on identical inputs. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace geo::bench { + +inline constexpr std::uint64_t kSeed = 0xC0FFEEull; + +// Returns `n` random points uniformly distributed over the globe. Latitude is +// clamped to [-80, 80] to keep all libraries on safe ground (S2 is fine near +// the poles, but ellipsoidal Inverse can be numerically iffy for nearly +// antipodal pairs). +inline std::vector random_points(std::size_t n, std::uint64_t seed = kSeed) { + std::mt19937_64 rng(seed); + std::uniform_real_distribution lat(-80.0, 80.0); + std::uniform_real_distribution lng(-180.0, 180.0); + std::vector out; + out.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + out.emplace_back(lat(rng), lng(rng)); + } + return out; +} + +// Returns a regular n-gon centered at (clat, clng) with the given radius +// (degrees). Vertices are emitted counter-clockwise (positive signed area +// in our convention). +inline std::vector regular_polygon(std::size_t n, double clat, double clng, double radius_deg) { + constexpr double kPi = 3.14159265358979323846; + std::vector out; + out.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + double a = 2.0 * kPi * static_cast(i) / static_cast(n); + out.emplace_back(clat + radius_deg * std::cos(a), + clng + radius_deg * std::sin(a)); + } + return out; +} + +// Query points clustered around the polygon center. Roughly half land +// inside, half outside — exercises both branches of contains(). +inline std::vector queries_around(double clat, double clng, double radius_deg, + std::size_t n, std::uint64_t seed = kSeed + 1) { + std::mt19937_64 rng(seed); + std::uniform_real_distribution jitter(-2.0, 2.0); + std::vector out; + out.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + out.emplace_back(clat + radius_deg * jitter(rng), + clng + radius_deg * jitter(rng)); + } + return out; +} + +} // namespace geo::bench diff --git a/benchmarks/size/consumer_boost.cpp b/benchmarks/size/consumer_boost.cpp new file mode 100644 index 0000000..a1fe34f --- /dev/null +++ b/benchmarks/size/consumer_boost.cpp @@ -0,0 +1,31 @@ +// Minimal consumer: distance + point-in-polygon, Boost.Geometry. + +#include +#include + +#include +#include +#include +#include + +namespace bg = boost::geometry; +using Pt = bg::model::point>; +using Poly = bg::model::polygon; + +int main(int argc, char** argv) { + if (argc < 5) return 1; + Pt a(std::atof(argv[2]), std::atof(argv[1])); // (lng, lat) + Pt b(std::atof(argv[4]), std::atof(argv[3])); + Poly poly; + bg::append(poly.outer(), Pt{-74.1, 40.7}); + bg::append(poly.outer(), Pt{-74.1, 40.8}); + bg::append(poly.outer(), Pt{-74.0, 40.8}); + bg::append(poly.outer(), Pt{-74.0, 40.7}); + bg::append(poly.outer(), Pt{-74.1, 40.7}); + bg::correct(poly); + + bg::strategy::distance::haversine hav(6371009.0); + const double d = bg::distance(a, b, hav); + std::printf("%.1f %d\n", d, static_cast(bg::within(a, poly))); + return 0; +} diff --git a/benchmarks/size/consumer_geo_utils.cpp b/benchmarks/size/consumer_geo_utils.cpp new file mode 100644 index 0000000..bf22d78 --- /dev/null +++ b/benchmarks/size/consumer_geo_utils.cpp @@ -0,0 +1,21 @@ +// Minimal consumer: distance + point-in-polygon, geo-utils-cpp. +// Reads four doubles (lat1 lng1 lat2 lng2) from argv so the optimizer can't +// pre-compute the answer, prints " ". + +#include +#include +#include + +#include + +int main(int argc, char** argv) { + if (argc < 5) return 1; + geo::LatLng a(std::atof(argv[1]), std::atof(argv[2])); + geo::LatLng b(std::atof(argv[3]), std::atof(argv[4])); + std::vector poly = { + {40.7, -74.1}, {40.8, -74.1}, {40.8, -74.0}, {40.7, -74.0}, + }; + std::printf("%.1f %d\n", geo::distance_between(a, b), + static_cast(geo::contains(a, poly))); + return 0; +} diff --git a/benchmarks/size/consumer_geographiclib.cpp b/benchmarks/size/consumer_geographiclib.cpp new file mode 100644 index 0000000..5e22e5b --- /dev/null +++ b/benchmarks/size/consumer_geographiclib.cpp @@ -0,0 +1,21 @@ +// Minimal consumer: distance only, GeographicLib. +// GeographicLib does not provide a native point-in-polygon predicate, so we +// emit "-1" in that column to make the omission explicit (rather than +// inflating its size by adding a hand-rolled PIP that no caller would +// actually depend on). + +#include +#include + +#include + +int main(int argc, char** argv) { + if (argc < 5) return 1; + double s12 = 0.0; + GeographicLib::Geodesic::WGS84().Inverse( + std::atof(argv[1]), std::atof(argv[2]), + std::atof(argv[3]), std::atof(argv[4]), + s12); + std::printf("%.1f -1\n", s12); + return 0; +} diff --git a/benchmarks/size/consumer_naive.cpp b/benchmarks/size/consumer_naive.cpp new file mode 100644 index 0000000..e54cc1d --- /dev/null +++ b/benchmarks/size/consumer_naive.cpp @@ -0,0 +1,59 @@ +// Minimal consumer: distance + point-in-polygon, hand-written, no library. +// Establishes a "no-dependency floor" against which every library's overhead +// can be measured. + +#include +#include +#include +#include +#include + +namespace { + +struct LL { + double lat; + double lng; +}; + +double distance(LL a, LL b) { + constexpr double kR = 6371009.0; + constexpr double kPi = 3.14159265358979323846; + const double lat1 = a.lat * kPi / 180.0; + const double lat2 = b.lat * kPi / 180.0; + const double dlat = (b.lat - a.lat) * kPi / 180.0; + const double dlng = (b.lng - a.lng) * kPi / 180.0; + const double s1 = std::sin(dlat * 0.5); + const double s2 = std::sin(dlng * 0.5); + const double h = s1 * s1 + std::cos(lat1) * std::cos(lat2) * s2 * s2; + return 2.0 * kR * std::asin(std::sqrt(h)); +} + +// Planar ray-cast — fine for a tiny bbox at ~40°N. NOT correct in general +// for spherical polygons, but representative of "the simplest thing a +// developer would write before reaching for a library". +bool contains(LL p, const std::vector& poly) { + bool inside = false; + const std::size_t n = poly.size(); + for (std::size_t i = 0, j = n - 1; i < n; j = i++) { + if ((poly[i].lng > p.lng) != (poly[j].lng > p.lng) && + p.lat < (poly[j].lat - poly[i].lat) * (p.lng - poly[i].lng) / + (poly[j].lng - poly[i].lng) + + poly[i].lat) { + inside = !inside; + } + } + return inside; +} + +} // namespace + +int main(int argc, char** argv) { + if (argc < 5) return 1; + LL a{std::atof(argv[1]), std::atof(argv[2])}; + LL b{std::atof(argv[3]), std::atof(argv[4])}; + std::vector poly = { + {40.7, -74.1}, {40.8, -74.1}, {40.8, -74.0}, {40.7, -74.0}, + }; + std::printf("%.1f %d\n", distance(a, b), static_cast(contains(a, poly))); + return 0; +} diff --git a/benchmarks/size/consumer_s2.cpp b/benchmarks/size/consumer_s2.cpp new file mode 100644 index 0000000..0b69fb8 --- /dev/null +++ b/benchmarks/size/consumer_s2.cpp @@ -0,0 +1,29 @@ +// Minimal consumer: distance + point-in-polygon, S2 Geometry. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + if (argc < 5) return 1; + const auto a = S2LatLng::FromDegrees(std::atof(argv[1]), std::atof(argv[2])).ToPoint(); + const auto b = S2LatLng::FromDegrees(std::atof(argv[3]), std::atof(argv[4])).ToPoint(); + std::vector verts = { + S2LatLng::FromDegrees(40.7, -74.1).ToPoint(), + S2LatLng::FromDegrees(40.8, -74.1).ToPoint(), + S2LatLng::FromDegrees(40.8, -74.0).ToPoint(), + S2LatLng::FromDegrees(40.7, -74.0).ToPoint(), + }; + auto loop = std::make_unique(verts); + loop->Normalize(); + const double d = S2Earth::ToMeters(S1Angle(a, b)); + std::printf("%.1f %d\n", d, static_cast(loop->Contains(a))); + return 0; +} diff --git a/benchmarks/size/measure.sh b/benchmarks/size/measure.sh new file mode 100755 index 0000000..4552a1f --- /dev/null +++ b/benchmarks/size/measure.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +# Builds a minimal consumer (distance + point-in-polygon) against each library +# we benchmark, then reports the stripped binary size. Also reports the +# install footprint of the library itself (Homebrew prefix size, when +# available) so the reader can see the *deployment* cost, not just the link +# cost. +# +# Override: +# CXX=clang++ ./measure.sh +# CXXFLAGS="-std=c++17 -O3 -DNDEBUG" ./measure.sh +# STATIC=1 ./measure.sh # add -static (best-effort; not all toolchains) + +set -euo pipefail + +CXX="${CXX:-c++}" +CXXFLAGS="${CXXFLAGS:--std=c++17 -O2 -DNDEBUG}" +EXTRA_LD="" +if [ "${STATIC:-0}" = "1" ]; then + EXTRA_LD="-static" +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" +TMP="$(mktemp -d)" +trap 'rm -rf "${TMP}"' EXIT + +is_macos() { [ "$(uname)" = "Darwin" ]; } + +file_size() { + if is_macos; then stat -f '%z' "$1"; else stat -c '%s' "$1"; fi +} + +human() { + awk -v b="$1" 'BEGIN { + split("B KB MB GB", u); + i = 1; + while (b >= 1024 && i < 4) { b /= 1024; i++ } + printf "%.1f %s", b, u[i] + }' +} + +dir_size() { + if [ -z "$1" ] || [ ! -e "$1" ]; then echo 0; return; fi + # -L follows symlinks (Homebrew's --prefix is a symlink into Cellar/). + if is_macos; then + du -skL "$1" 2>/dev/null | awk '{print $1 * 1024}' + else + du -sbL "$1" 2>/dev/null | awk '{print $1}' + fi +} + +row() { + # name, binary_size_bytes ("" if skipped), install_size_bytes ("" if N/A), notes + local name="$1" bin="$2" inst="$3" notes="$4" + local bin_str="—" inst_str="—" + [ -n "${bin}" ] && bin_str=$(human "${bin}") + [ -n "${inst}" ] && inst_str=$(human "${inst}") + printf " %-22s %14s %14s %s\n" "${name}" "${bin_str}" "${inst_str}" "${notes}" +} + +build() { + # name, source, extra_flags... + local name="$1"; shift + local source="$1"; shift + local out="${TMP}/${name// /_}" + if "${CXX}" ${CXXFLAGS} ${EXTRA_LD} "${source}" -o "${out}" "$@" 2>"${TMP}/${name}.log"; then + strip "${out}" 2>/dev/null || true + file_size "${out}" + else + echo "" # signals "skipped" + fi +} + +brew_prefix() { + command -v brew >/dev/null 2>&1 && brew --prefix "$1" 2>/dev/null || true +} + +echo "Stripped binary + install-footprint comparison" +echo " CXX=${CXX}, CXXFLAGS=${CXXFLAGS}${EXTRA_LD:+ ${EXTRA_LD}}" +echo +printf " %-22s %14s %14s %s\n" "Library" "Binary" "Installed" "Notes" +printf " %-22s %14s %14s %s\n" "-------" "------" "---------" "-----" + +# --- geo-utils-cpp (header-only, in-tree) ---------------------------------- +GU_BIN=$(build "geo-utils-cpp" "${SCRIPT_DIR}/consumer_geo_utils.cpp" \ + -I "${ROOT_DIR}/include") +GU_INST=$(dir_size "${ROOT_DIR}/include") +row "geo-utils-cpp" "${GU_BIN}" "${GU_INST}" "header-only, zero deps" + +# --- naive (no library) ---------------------------------------------------- +NV_BIN=$(build "naive" "${SCRIPT_DIR}/consumer_naive.cpp") +row "naive (no library)" "${NV_BIN}" "0" "hand-written haversine + planar PIP" + +# --- S2 Geometry ----------------------------------------------------------- +S2_FLAGS="" +S2_PFX="$(brew_prefix s2geometry)" +if [ -n "${S2_PFX}" ] && [ -d "${S2_PFX}" ]; then + S2_FLAGS="-I${S2_PFX}/include -L${S2_PFX}/lib -ls2" + # absl is a runtime peer; common labels: + ABSL_PFX="$(brew_prefix abseil)" + [ -n "${ABSL_PFX}" ] && S2_FLAGS="${S2_FLAGS} -I${ABSL_PFX}/include -L${ABSL_PFX}/lib" +elif command -v pkg-config >/dev/null && pkg-config --exists s2 2>/dev/null; then + S2_FLAGS="$(pkg-config --cflags --libs s2)" +fi +if [ -n "${S2_FLAGS}" ]; then + S2_BIN=$(build "S2 Geometry" "${SCRIPT_DIR}/consumer_s2.cpp" ${S2_FLAGS}) + S2_INST=$(dir_size "${S2_PFX}") + ABSL_INST=$(dir_size "${ABSL_PFX:-}") + S2_TOTAL=$((S2_INST + ABSL_INST)) + row "S2 Geometry" "${S2_BIN}" "${S2_TOTAL}" "+ abseil dependency" +else + row "S2 Geometry" "" "" "not installed (brew install s2geometry)" +fi + +# --- Boost.Geometry -------------------------------------------------------- +BOOST_FLAGS="" +BOOST_PFX="$(brew_prefix boost)" +if [ -n "${BOOST_PFX}" ] && [ -d "${BOOST_PFX}/include/boost" ]; then + BOOST_FLAGS="-I${BOOST_PFX}/include" +elif [ -d /usr/include/boost ]; then + BOOST_FLAGS="" + BOOST_PFX="/usr" +fi +if [ -n "${BOOST_PFX}" ] && [ -d "${BOOST_PFX}" ]; then + B_BIN=$(build "Boost.Geometry" "${SCRIPT_DIR}/consumer_boost.cpp" ${BOOST_FLAGS}) + # Whole Boost is huge; report only Boost.Geometry headers as a fairer figure. + BG_HDRS="${BOOST_PFX}/include/boost/geometry" + B_INST=$(dir_size "${BG_HDRS}") + row "Boost.Geometry" "${B_BIN}" "${B_INST}" "headers-only subset of Boost" +else + row "Boost.Geometry" "" "" "not installed (brew install boost)" +fi + +# --- GeographicLib --------------------------------------------------------- +GL_FLAGS="" +GL_PFX="$(brew_prefix geographiclib)" +if [ -n "${GL_PFX}" ] && [ -d "${GL_PFX}" ]; then + GL_FLAGS="-I${GL_PFX}/include -L${GL_PFX}/lib -lGeographicLib" +elif command -v pkg-config >/dev/null && pkg-config --exists geographiclib 2>/dev/null; then + GL_FLAGS="$(pkg-config --cflags --libs geographiclib)" +fi +if [ -n "${GL_FLAGS}" ]; then + GL_BIN=$(build "GeographicLib" "${SCRIPT_DIR}/consumer_geographiclib.cpp" ${GL_FLAGS}) + GL_INST=$(dir_size "${GL_PFX}") + row "GeographicLib" "${GL_BIN}" "${GL_INST}" "no native PIP — distance only" +else + row "GeographicLib" "" "" "not installed (brew install geographiclib)" +fi + +echo +echo "Notes:" +echo " - 'Binary' is the stripped consumer executable (dynamic linking)." +echo " - 'Installed' for geo-utils-cpp is the include/ directory (everything" +echo " you ship). For other libraries it's the package install prefix on" +echo " Homebrew (or the geometry subset for Boost) — what the user must" +echo " have on disk to consume the library." +echo " - Build logs (if any) are in: ${TMP} (kept until script exit)" diff --git a/benchmarks/speed/bench_boost.cpp b/benchmarks/speed/bench_boost.cpp new file mode 100644 index 0000000..224fe77 --- /dev/null +++ b/benchmarks/speed/bench_boost.cpp @@ -0,0 +1,136 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 +// +// Boost.Geometry equivalents using `cs::spherical_equatorial` — +// the same sphere-based model we use, so this is a fair head-to-head. +// +// Coordinate convention reminder: Boost.Geometry expects (lng, lat) order +// for spherical_equatorial points. + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "random_data.hpp" + +namespace bg = boost::geometry; + +namespace { + +using Point = bg::model::point>; +using Polygon = bg::model::polygon; +using LineString = bg::model::linestring; + +constexpr double kEarthRadius = 6371009.0; + +inline Point to_boost_point(const geo::LatLng& p) { + return Point(p.lng, p.lat); // (lng, lat) order! +} + +inline Polygon to_boost_polygon(const std::vector& pts) { + Polygon poly; + auto& outer = poly.outer(); + outer.reserve(pts.size() + 1); + for (const auto& p : pts) outer.emplace_back(p.lng, p.lat); + outer.emplace_back(pts.front().lng, pts.front().lat); // close the ring + bg::correct(poly); // ensure orientation matches Boost's expectation + return poly; +} + +inline LineString to_boost_linestring(const std::vector& pts) { + LineString ls; + ls.reserve(pts.size()); + for (const auto& p : pts) ls.emplace_back(p.lng, p.lat); + return ls; +} + +} // namespace + +// --- distance -------------------------------------------------------------- + +static void BM_Boost_DistanceBetween(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + bg::strategy::distance::haversine haversine(kEarthRadius); + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + const Point a = to_boost_point(pts[i]); + const Point b = to_boost_point(pts[i + 1]); + benchmark::DoNotOptimize(bg::distance(a, b, haversine)); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_Boost_DistanceBetween)->Arg(1000)->Arg(100000); + +// --- heading (azimuth) ----------------------------------------------------- + +static void BM_Boost_Heading(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + const Point a = to_boost_point(pts[i]); + const Point b = to_boost_point(pts[i + 1]); + benchmark::DoNotOptimize(bg::azimuth(a, b)); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_Boost_Heading)->Arg(1000)->Arg(100000); + +// --- contains (within) ----------------------------------------------------- + +static void BM_Boost_Contains(benchmark::State& state) { + const auto poly_ll = geo::bench::regular_polygon( + static_cast(state.range(0)), 40.0, -74.0, 5.0); + const Polygon poly = to_boost_polygon(poly_ll); + + const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + std::vector queries; + queries.reserve(queries_ll.size()); + for (const auto& q : queries_ll) queries.push_back(to_boost_point(q)); + + for (auto _ : state) { + for (const auto& q : queries) { + benchmark::DoNotOptimize(bg::within(q, poly)); + } + } + state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); +} +BENCHMARK(BM_Boost_Contains)->Arg(10)->Arg(100)->Arg(1000); + +// --- area ------------------------------------------------------------------ + +static void BM_Boost_Area(benchmark::State& state) { + const auto poly_ll = geo::bench::regular_polygon( + static_cast(state.range(0)), 40.0, -74.0, 5.0); + const Polygon poly = to_boost_polygon(poly_ll); + // bg::area on a spherical CS returns area on the unit sphere (steradians); + // multiply by R² to get square meters. + const double r2 = kEarthRadius * kEarthRadius; + for (auto _ : state) { + benchmark::DoNotOptimize(bg::area(poly) * r2); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_Boost_Area)->Arg(10)->Arg(100)->Arg(1000); + +// --- path_length ----------------------------------------------------------- + +static void BM_Boost_PathLength(benchmark::State& state) { + const auto path_ll = geo::bench::random_points(static_cast(state.range(0))); + const LineString ls = to_boost_linestring(path_ll); + bg::strategy::distance::haversine haversine(kEarthRadius); + for (auto _ : state) { + benchmark::DoNotOptimize(bg::length(ls, haversine)); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_Boost_PathLength)->Arg(10)->Arg(100)->Arg(1000); diff --git a/benchmarks/speed/bench_geo_utils.cpp b/benchmarks/speed/bench_geo_utils.cpp new file mode 100644 index 0000000..13fad86 --- /dev/null +++ b/benchmarks/speed/bench_geo_utils.cpp @@ -0,0 +1,75 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 + +#include + +#include +#include + +#include + +#include "random_data.hpp" + +// --- distance_between ------------------------------------------------------- + +static void BM_GeoUtils_DistanceBetween(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + benchmark::DoNotOptimize(geo::distance_between(pts[i], pts[i + 1])); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeoUtils_DistanceBetween)->Arg(1000)->Arg(100000); + +// --- heading ---------------------------------------------------------------- + +static void BM_GeoUtils_Heading(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + benchmark::DoNotOptimize(geo::heading(pts[i], pts[i + 1])); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeoUtils_Heading)->Arg(1000)->Arg(100000); + +// --- contains (point-in-polygon) ------------------------------------------- + +static void BM_GeoUtils_Contains(benchmark::State& state) { + const auto poly = geo::bench::regular_polygon( + static_cast(state.range(0)), 40.0, -74.0, 5.0); + const auto queries = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + for (auto _ : state) { + for (const auto& q : queries) { + benchmark::DoNotOptimize(geo::contains(q, poly)); + } + } + state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); +} +BENCHMARK(BM_GeoUtils_Contains)->Arg(10)->Arg(100)->Arg(1000); + +// --- area ------------------------------------------------------------------- + +static void BM_GeoUtils_Area(benchmark::State& state) { + const auto poly = geo::bench::regular_polygon( + static_cast(state.range(0)), 40.0, -74.0, 5.0); + for (auto _ : state) { + benchmark::DoNotOptimize(geo::area(poly)); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeoUtils_Area)->Arg(10)->Arg(100)->Arg(1000); + +// --- path_length ------------------------------------------------------------ + +static void BM_GeoUtils_PathLength(benchmark::State& state) { + const auto path = geo::bench::random_points(static_cast(state.range(0))); + for (auto _ : state) { + benchmark::DoNotOptimize(geo::path_length(path)); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeoUtils_PathLength)->Arg(10)->Arg(100)->Arg(1000); diff --git a/benchmarks/speed/bench_geographiclib.cpp b/benchmarks/speed/bench_geographiclib.cpp new file mode 100644 index 0000000..e13f8a8 --- /dev/null +++ b/benchmarks/speed/bench_geographiclib.cpp @@ -0,0 +1,100 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 +// +// GeographicLib uses an ellipsoid (WGS84 by default) and Karney's iterative +// geodesic algorithm — significantly more accurate than haversine but also +// slower. The numbers here intentionally show that gap; this is a +// *trade-off* benchmark, not a "we are faster" benchmark. +// +// GeographicLib does not ship a native point-in-polygon predicate, so the +// `contains` benchmark is omitted (this is itself information for the reader). + +#include + +#include +#include + +#include +#include + +#include "random_data.hpp" + +namespace { + +inline const GeographicLib::Geodesic& wgs84() { + return GeographicLib::Geodesic::WGS84(); +} + +} // namespace + +// --- distance -------------------------------------------------------------- + +static void BM_GeographicLib_DistanceBetween(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + const auto& geod = wgs84(); + double s12 = 0.0; + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + geod.Inverse(pts[i].lat, pts[i].lng, pts[i + 1].lat, pts[i + 1].lng, s12); + benchmark::DoNotOptimize(s12); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeographicLib_DistanceBetween)->Arg(1000)->Arg(100000); + +// --- heading --------------------------------------------------------------- + +static void BM_GeographicLib_Heading(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + const auto& geod = wgs84(); + double s12 = 0.0; + double azi1 = 0.0; + double azi2 = 0.0; + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + geod.Inverse(pts[i].lat, pts[i].lng, pts[i + 1].lat, pts[i + 1].lng, + s12, azi1, azi2); + benchmark::DoNotOptimize(azi1); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeographicLib_Heading)->Arg(1000)->Arg(100000); + +// --- area (PolygonArea, polyline=false) ----------------------------------- + +static void BM_GeographicLib_Area(benchmark::State& state) { + const auto poly = geo::bench::regular_polygon( + static_cast(state.range(0)), 40.0, -74.0, 5.0); + for (auto _ : state) { + GeographicLib::PolygonArea pa(wgs84(), /*polyline=*/false); + for (const auto& p : poly) pa.AddPoint(p.lat, p.lng); + double perimeter = 0.0; + double area = 0.0; + pa.Compute(/*reverse=*/false, /*sign=*/true, perimeter, area); + benchmark::DoNotOptimize(area); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeographicLib_Area)->Arg(10)->Arg(100)->Arg(1000); + +// --- path_length (PolygonArea with polyline=true) ------------------------- + +static void BM_GeographicLib_PathLength(benchmark::State& state) { + const auto path = geo::bench::random_points(static_cast(state.range(0))); + for (auto _ : state) { + GeographicLib::PolygonArea pa(wgs84(), /*polyline=*/true); + for (const auto& p : path) pa.AddPoint(p.lat, p.lng); + double perimeter = 0.0; + double area = 0.0; + pa.Compute(/*reverse=*/false, /*sign=*/true, perimeter, area); + benchmark::DoNotOptimize(perimeter); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_GeographicLib_PathLength)->Arg(10)->Arg(100)->Arg(1000); + +// Note: contains() benchmark intentionally omitted — GeographicLib does not +// provide a native point-in-polygon predicate. This is a real capability gap, +// not just a benchmark omission. diff --git a/benchmarks/speed/bench_naive.cpp b/benchmarks/speed/bench_naive.cpp new file mode 100644 index 0000000..fb3ce92 --- /dev/null +++ b/benchmarks/speed/bench_naive.cpp @@ -0,0 +1,42 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 +// +// Reference baseline: a textbook haversine implementation, no library at all. +// Tells us how much overhead `geo-utils-cpp` adds over the simplest possible +// hand-written code (ideally: zero — we are inlined headers). + +#include + +#include +#include + +#include "random_data.hpp" + +namespace { + +constexpr double kEarthRadius = 6371009.0; +constexpr double kPi = 3.14159265358979323846; + +inline double naive_distance(geo::LatLng a, geo::LatLng b) noexcept { + const double lat1 = a.lat * kPi / 180.0; + const double lat2 = b.lat * kPi / 180.0; + const double dlat = (b.lat - a.lat) * kPi / 180.0; + const double dlng = (b.lng - a.lng) * kPi / 180.0; + const double s_lat = std::sin(dlat * 0.5); + const double s_lng = std::sin(dlng * 0.5); + const double h = s_lat * s_lat + std::cos(lat1) * std::cos(lat2) * s_lng * s_lng; + return 2.0 * kEarthRadius * std::asin(std::sqrt(h)); +} + +} // namespace + +static void BM_Naive_DistanceBetween(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + benchmark::DoNotOptimize(naive_distance(pts[i], pts[i + 1])); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_Naive_DistanceBetween)->Arg(1000)->Arg(100000); diff --git a/benchmarks/speed/bench_s2.cpp b/benchmarks/speed/bench_s2.cpp new file mode 100644 index 0000000..00eba60 --- /dev/null +++ b/benchmarks/speed/bench_s2.cpp @@ -0,0 +1,106 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 +// +// S2 Geometry equivalents. S2 uses a sphere, same as us — apples-to-apples +// comparison on accuracy. Where the API forces a different shape (input is +// 3D unit vector S2Point, not lat/lng), we include the conversion in the +// timed loop because that is the realistic cost when the data comes from +// outside as lat/lng. +// +// Note: S2 does not ship a public initial-bearing function, so the heading +// benchmark is intentionally absent — this is itself information for the +// reader. + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "random_data.hpp" + +namespace { + +inline S2Point to_s2_point(const geo::LatLng& p) { + return S2LatLng::FromDegrees(p.lat, p.lng).ToPoint(); +} + +inline std::vector to_s2_points(const std::vector& src) { + std::vector out; + out.reserve(src.size()); + for (const auto& p : src) out.push_back(to_s2_point(p)); + return out; +} + +} // namespace + +// --- distance: input is lat/lng, conversion counted in the timed loop ------ + +static void BM_S2_DistanceBetween(benchmark::State& state) { + const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + for (auto _ : state) { + for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { + const S2Point a = to_s2_point(pts[i]); + const S2Point b = to_s2_point(pts[i + 1]); + const S1Angle angle(a, b); + benchmark::DoNotOptimize(S2Earth::ToMeters(angle)); + } + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_S2_DistanceBetween)->Arg(1000)->Arg(100000); + +// --- contains: polygon converted once outside the loop --------------------- + +static void BM_S2_Contains(benchmark::State& state) { + const auto poly_ll = geo::bench::regular_polygon( + static_cast(state.range(0)), 40.0, -74.0, 5.0); + auto loop = std::make_unique(to_s2_points(poly_ll)); + loop->Normalize(); // S2Loop expects CCW; Normalize flips if needed. + + const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + const auto queries = to_s2_points(queries_ll); + + for (auto _ : state) { + for (const auto& q : queries) { + benchmark::DoNotOptimize(loop->Contains(q)); + } + } + state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); +} +BENCHMARK(BM_S2_Contains)->Arg(10)->Arg(100)->Arg(1000); + +// --- area ------------------------------------------------------------------ + +static void BM_S2_Area(benchmark::State& state) { + const auto poly_ll = geo::bench::regular_polygon( + static_cast(state.range(0)), 40.0, -74.0, 5.0); + auto loop = std::make_unique(to_s2_points(poly_ll)); + loop->Normalize(); + + const double r2 = S2Earth::RadiusMeters() * S2Earth::RadiusMeters(); + for (auto _ : state) { + benchmark::DoNotOptimize(loop->GetArea() * r2); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_S2_Area)->Arg(10)->Arg(100)->Arg(1000); + +// --- path_length ----------------------------------------------------------- + +static void BM_S2_PathLength(benchmark::State& state) { + const auto path_ll = geo::bench::random_points(static_cast(state.range(0))); + const S2Polyline line(to_s2_points(path_ll)); + for (auto _ : state) { + benchmark::DoNotOptimize(S2Earth::ToMeters(line.GetLength())); + } + state.SetItemsProcessed(state.iterations() * state.range(0)); +} +BENCHMARK(BM_S2_PathLength)->Arg(10)->Arg(100)->Arg(1000); diff --git a/docs/benchmarks.md b/docs/benchmarks.md new file mode 100644 index 0000000..851d167 --- /dev/null +++ b/docs/benchmarks.md @@ -0,0 +1,186 @@ +# Benchmarks + +`geo-utils-cpp` aims to be **as fast as a hand-written haversine** with a +**fraction of the disk footprint** of the popular alternatives. This page +shows the numbers behind that claim and how to reproduce them. + +For build and run instructions see [`benchmarks/README.md`](../benchmarks/README.md). + +## TL;DR + +- **Speed.** Tied with hand-written haversine on every operation we provide + (header-only design adds zero overhead). Within ~10 % of Boost.Geometry's + spherical strategy on raw distance/heading. ~10–30× faster than + GeographicLib's WGS84 geodesic — but **less accurate**, on a sphere. +- **Disk footprint.** **32 KB** of headers vs **32.8 MB** for S2 (with + abseil), **12.3 MB** for Boost.Geometry's `geometry` subset alone, **4.6 MB** + for GeographicLib — i.e. a ~144–1000× difference in what you have to ship + or have on disk. +- **Where competitors win.** S2 has spatial indexing so its `contains` + scales sub-linearly with polygon size; if you query a 1 000-vertex + geofence millions of times, S2 will win. We're still ~7× faster than + Boost.Geometry on `contains`, though. + +## Methodology + +| Item | Value | +| ----------------- | -------------------------------------------------- | +| Compiler | Apple clang 17 (`-std=c++17 -O2 -DNDEBUG`) | +| Build type | Release | +| Benchmark harness | [Google Benchmark 1.8.4](https://github.com/google/benchmark) | +| Host | Apple M1, 8 cores, 8 GB RAM, macOS 15.7 | +| Random data | Mersenne-twister seeded to a fixed value, lat ∈ [-80, 80], lng ∈ [-180, 180] | +| Library versions | s2geometry 0.14.0 · boost 1.90.0 · geographiclib 2.7 | + +Each library is fed identical inputs (see +[`benchmarks/common/random_data.hpp`](../benchmarks/common/random_data.hpp)). +Conversion to a library's native point type is **counted in the timed +loop** when the input is naturally lat/lng (distance, heading). For polygon +operations the polygon is converted once outside the loop, matching real +geofence-style usage. + +### Apples-to-apples notes + +- **S2 vs us:** both on a sphere. Fair on accuracy. S2 holds polygons in an + internal index (`S2Loop` keeps a bounding rect; `S2ShapeIndex` adds more); + that's why its `contains` scales differently from our linear loop. +- **Boost.Geometry vs us:** uses `cs::spherical_equatorial`. Fair. +- **GeographicLib vs us:** uses Karney's iterative WGS84 geodesic. + Slower *and* more accurate. Treat as a trade-off data point, not a + "we are faster" claim. +- **GeographicLib has no native point-in-polygon.** Real capability gap. +- **S2 has no public initial-bearing API.** Same. + +## Speed results + +Throughput in million items per second (higher is better). All numbers from +the host described above. Run `./build-bench/benchmarks/bench_*` locally +to reproduce. + +### `distance_between` + +| Library | N=1 000 | N=100 000 | +| ---------------------- | ------: | --------: | +| **geo-utils-cpp** | 36.7 | 25.2 | +| naive haversine | 37.0 | 25.7 | +| S2 Geometry | 14.3 | 10.8 | +| Boost.Geometry | 40.0 | 28.7 | +| GeographicLib (WGS84) | 1.22 | 1.22 | + +We match naive haversine within noise. Boost.Geometry's haversine is ~10 % +faster — likely a different operation order in their inline code. S2 is +slower because each call converts lat/lng → 3D `S2Point`. GeographicLib is +~30× slower (and ~30× more accurate near long-distance pairs). + +### `heading` + +| Library | N=1 000 | N=100 000 | +| ---------------------- | ------: | --------: | +| **geo-utils-cpp** | 23.1 | 13.9 | +| Boost.Geometry | 24.9 | 14.9 | +| GeographicLib | 1.15 | 1.14 | +| S2 Geometry | _no public bearing API_ | | + +### `contains` (point-in-polygon) + +Million queries per second; 1 000 query points × polygon vertex count per +iteration. + +| Library | poly N=10 | poly N=100 | poly N=1 000 | +| -------------------- | --------: | ---------: | -----------: | +| **geo-utils-cpp** | 13.3 | 2.17 | 0.244 | +| S2 Geometry | 27.1 | 18.0 | 21.1 | +| Boost.Geometry | 1.89 | 0.23 | 0.024 | +| GeographicLib | _no native PIP_ | | | + +This is where the libraries differ most. **S2 has a built-in bounding-rect +test plus a stream-of-edges layout** that makes its loop containment near +sub-linear in vertex count. Our implementation is a textbook O(N) ray cast +through the great-circle edges — fast for small geofences, slower than S2 +on huge ones. Boost.Geometry's spherical `within` is significantly slower +than both. + +If your workload is "millions of queries against the same large polygon", +S2 will beat us. If it's "small or medium polygons, queried in any volume", +we're competitive and you get a ~1000× smaller install. + +### `area` (M polygons/s × vertex count) + +| Library | N=10 | N=100 | N=1 000 | +| -------------------- | ---: | ----: | ------: | +| **geo-utils-cpp** | 62.4 | 59.7 | 60.6 | +| S2 Geometry | 16.1 | 13.8 | 13.8 | +| Boost.Geometry | 44.7 | 36.1 | 36.5 | +| GeographicLib | 1.71 | 2.00 | 2.04 | + +We win clearly on `area` — our spherical-triangle accumulation is a +straight-line loop with no allocation; S2 spends time inside `S2Loop::GetArea` +maintaining its index, and Boost.Geometry's strategy machinery costs ~1.6×. + +### `path_length` (M points/s) + +| Library | N=10 | N=100 | N=1 000 | +| -------------------- | ---: | ----: | ------: | +| **geo-utils-cpp** | 52.0 | 43.9 | 39.7 | +| S2 Geometry | 104.6| 96.2 | 85.7 | +| Boost.Geometry | 49.0 | 43.2 | 39.9 | +| GeographicLib | 1.50 | 1.24 | 1.21 | + +S2 wins by ~2× because the `S2Polyline` is pre-built (3D unit vectors stored +contiguously) and the inner loop reduces to dot products rather than `sin`, +`cos`, `atan2`. We tied with Boost.Geometry. GeographicLib pays the +ellipsoidal cost. + +## Disk footprint + +Stripped binary size of a minimal "distance + point-in-polygon" consumer +plus the on-disk install size of each library. Smaller is better. + +| Library | Stripped binary | Library install | Notes | +| -------------------- | --------------: | --------------: | ---------------------------------- | +| **geo-utils-cpp** | **33 KB** | **32 KB** | header-only, zero deps | +| naive haversine | 33 KB | 0 | hand-written, no library | +| S2 Geometry | 33.4 KB | 32.8 MB | S2 7.1 MB + abseil 14 MB (+ rest) | +| Boost.Geometry | 50.8 KB | 12.3 MB | only the `geometry` subset of Boost | +| GeographicLib | 33 KB | 4.6 MB | distance only — no PIP | + +The "Library install" column is *what you have to ship or have on disk* to +use the library. For `geo-utils-cpp` that's the whole `include/` directory +(every header, every comment); for the others it's the package install +prefix from Homebrew. Boost is reported as just its `geometry` headers — +the full Boost install is ~362 MB. + +**The on-disk gap is roughly 144× to 1000×.** If binary size matters +(embedded, container layers, mobile bundles), this is the headline. + +## Where each library is the right tool + +- **geo-utils-cpp** — small/medium polygons, lat/lng inputs, no-deps + constraint, container/mobile bundle size matters. Sphere accuracy is + acceptable. +- **S2 Geometry** — millions of queries against the same large geofence, + spatial indexing matters, you can afford ~33 MB install. Different + data model (3D unit vectors). +- **Boost.Geometry** — already-Boost project, want one library for many + geometry types and CSes. Slower at PIP than both us and S2; pulls in the + whole `geometry` subset. +- **GeographicLib** — sub-meter geodesic accuracy on the WGS84 ellipsoid. + Slower by 1–2 orders of magnitude. No PIP. + +## Reproducing + +```sh +# Speed +cmake -S . -B build-bench \ + -DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=Release +cmake --build build-bench --target bench_all -j +for b in build-bench/benchmarks/bench_*; do "$b"; done + +# Disk footprint +./benchmarks/size/measure.sh +``` + +If a competitor is missing it will be skipped with a `STATUS` message during +CMake configure (or a "not installed" line from `measure.sh`). Install only +the competitors you care about. From 92cd438a55481566553342d64549f29d8bfbcf01 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Sat, 9 May 2026 15:51:16 +0200 Subject: [PATCH 2/9] Refreshed benchmark numbers and tightened apples-to-apples --- .github/workflows/ci.yml | 10 ++ README.md | 27 +++-- benchmarks/CMakeLists.txt | 12 +- benchmarks/size/consumer_boost.cpp | 5 +- benchmarks/size/consumer_geo_utils.cpp | 8 +- benchmarks/size/consumer_geographiclib.cpp | 5 +- benchmarks/size/consumer_naive.cpp | 5 +- benchmarks/size/consumer_s2.cpp | 5 +- benchmarks/size/measure.sh | 17 ++- benchmarks/speed/bench_boost.cpp | 26 +++-- benchmarks/speed/bench_geo_utils.cpp | 3 +- benchmarks/speed/bench_s2.cpp | 26 +++-- docs/benchmarks.md | 127 +++++++++++---------- 13 files changed, 165 insertions(+), 111 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4f11f72..ab80d6b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,3 +71,13 @@ jobs: - name: Test run: ctest --test-dir build -C Release --output-on-failure + + # Smoke-build the dependency-free portion of the benchmarks so that + # changes to benchmarks/CMakeLists.txt or the bench source files can't + # silently break the build. We don't run the benchmarks here — that + # requires installing S2/Boost/GeographicLib and is intentionally + # out-of-scope for CI. + - name: Smoke-build benchmarks (no external deps) + run: | + cmake -S . -B build-bench -DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON -DGEO_UTILS_CPP_BUILD_TESTS=OFF -DGEO_UTILS_CPP_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release + cmake --build build-bench --config Release --target bench_geo_utils bench_naive diff --git a/README.md b/README.md index 859c46f..f3df51b 100644 --- a/README.md +++ b/README.md @@ -144,18 +144,21 @@ int main() { `geo-utils-cpp` is a near-zero-overhead wrapper over the math itself, with a tiny disk footprint thanks to header-only + zero dependencies. -| Library | `distance_between` (M pairs/s) | `contains` (poly N=100, M qps) | Install size | -| -------------------- | -----------------------------: | -----------------------------: | ------------: | -| **geo-utils-cpp** | **36.7** | **2.17** | **32 KB** | -| naive haversine | 37.0 | — | 0 | -| S2 Geometry | 14.3 | 18.0 | 32.8 MB | -| Boost.Geometry | 40.0 | 0.23 | 12.3 MB | -| GeographicLib | 1.2 | no native PIP | 4.6 MB | - -Apple M1 · clang 17 · `-O2 -DNDEBUG`. We're tied with hand-written haversine -on raw math, ~7× faster than Boost.Geometry on point-in-polygon, and have a -**144–1000× smaller install footprint** than the alternatives. S2 wins on -large-polygon containment thanks to spatial indexing. +| Library | `distance_between` (M pairs/s) | `contains` (poly N=10, M qps) | Install size | +| -------------------- | -----------------------------: | ----------------------------: | ------------: | +| **geo-utils-cpp** | **39.5** | **15.9** | **32 KB** | +| naive haversine | 37.4 | — | 0 | +| S2 Geometry | 15.1 | 12.9 | 32.8 MB | +| Boost.Geometry | 38.4 | 1.85| 12.3 MB | +| GeographicLib | 1.2 | no native PIP | 4.6 MB | + +Apple M1 · clang 17 · `-O2 -DNDEBUG`. Tied with Boost.Geometry on +per-pair ops (`distance`, `heading`) within noise; ahead on polygon ops +(`area`, `path_length`, `contains` for tiny polygons). Faster than S2 on +`distance`, `heading`, `area`, `path_length`, and on `contains` against +~10-vertex polygons. **S2 wins `contains` from ~100 vertices onward** via +its bounding-rectangle prefilter. **144–1000× smaller install footprint** +than the alternatives. Zero overhead over hand-written haversine. See [docs/benchmarks.md](docs/benchmarks.md) for the full methodology, all operations, and a discussion of when to reach for each library. diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 7c00b8a..515e7b3 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -19,7 +19,8 @@ set(BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "" FORCE) FetchContent_Declare( google_benchmark - URL https://github.com/google/benchmark/archive/v1.8.4.tar.gz + URL https://github.com/google/benchmark/archive/v1.8.4.tar.gz + URL_HASH SHA256=3e7059b6b11fb1bbe28e33e02519398ca94c1818874ebed18e504dc6f709be45 DOWNLOAD_EXTRACT_TIMESTAMP ON ) FetchContent_MakeAvailable(google_benchmark) @@ -54,8 +55,9 @@ _geo_utils_cpp_add_bench(naive speed/bench_naive.cpp) # --- Optional: S2 Geometry -------------------------------------------------- # # vcpkg port name: `s2geometry`. Homebrew formula: `s2geometry`. The CMake -# package is conventionally exported as `s2`. -find_package(s2 QUIET) +# package is conventionally exported as `s2`, but case-sensitive filesystems +# may also see `S2Config.cmake` from some installs — accept either. +find_package(s2 NAMES s2 S2 QUIET) if(s2_FOUND) _geo_utils_cpp_add_bench(s2 speed/bench_s2.cpp s2::s2) message(STATUS "geo-utils-cpp benchmarks: S2 found — bench_s2 enabled.") @@ -73,9 +75,9 @@ if(POLICY CMP0167) cmake_policy(SET CMP0167 NEW) endif() -find_package(Boost 1.70 QUIET CONFIG) +find_package(Boost 1.71 QUIET CONFIG) if(NOT Boost_FOUND) - find_package(Boost 1.70 QUIET) + find_package(Boost 1.71 QUIET) endif() if(Boost_FOUND) diff --git a/benchmarks/size/consumer_boost.cpp b/benchmarks/size/consumer_boost.cpp index a1fe34f..083c950 100644 --- a/benchmarks/size/consumer_boost.cpp +++ b/benchmarks/size/consumer_boost.cpp @@ -14,8 +14,9 @@ using Poly = bg::model::polygon; int main(int argc, char** argv) { if (argc < 5) return 1; - Pt a(std::atof(argv[2]), std::atof(argv[1])); // (lng, lat) - Pt b(std::atof(argv[4]), std::atof(argv[3])); + char* end; + Pt a(std::strtod(argv[2], &end), std::strtod(argv[1], &end)); // (lng, lat) + Pt b(std::strtod(argv[4], &end), std::strtod(argv[3], &end)); Poly poly; bg::append(poly.outer(), Pt{-74.1, 40.7}); bg::append(poly.outer(), Pt{-74.1, 40.8}); diff --git a/benchmarks/size/consumer_geo_utils.cpp b/benchmarks/size/consumer_geo_utils.cpp index bf22d78..1e8684c 100644 --- a/benchmarks/size/consumer_geo_utils.cpp +++ b/benchmarks/size/consumer_geo_utils.cpp @@ -6,12 +6,14 @@ #include #include -#include +#include +#include int main(int argc, char** argv) { if (argc < 5) return 1; - geo::LatLng a(std::atof(argv[1]), std::atof(argv[2])); - geo::LatLng b(std::atof(argv[3]), std::atof(argv[4])); + char* end; + geo::LatLng a(std::strtod(argv[1], &end), std::strtod(argv[2], &end)); + geo::LatLng b(std::strtod(argv[3], &end), std::strtod(argv[4], &end)); std::vector poly = { {40.7, -74.1}, {40.8, -74.1}, {40.8, -74.0}, {40.7, -74.0}, }; diff --git a/benchmarks/size/consumer_geographiclib.cpp b/benchmarks/size/consumer_geographiclib.cpp index 5e22e5b..4c5fd47 100644 --- a/benchmarks/size/consumer_geographiclib.cpp +++ b/benchmarks/size/consumer_geographiclib.cpp @@ -12,9 +12,10 @@ int main(int argc, char** argv) { if (argc < 5) return 1; double s12 = 0.0; + char* end; GeographicLib::Geodesic::WGS84().Inverse( - std::atof(argv[1]), std::atof(argv[2]), - std::atof(argv[3]), std::atof(argv[4]), + std::strtod(argv[1], &end), std::strtod(argv[2], &end), + std::strtod(argv[3], &end), std::strtod(argv[4], &end), s12); std::printf("%.1f -1\n", s12); return 0; diff --git a/benchmarks/size/consumer_naive.cpp b/benchmarks/size/consumer_naive.cpp index e54cc1d..9e0072b 100644 --- a/benchmarks/size/consumer_naive.cpp +++ b/benchmarks/size/consumer_naive.cpp @@ -49,8 +49,9 @@ bool contains(LL p, const std::vector& poly) { int main(int argc, char** argv) { if (argc < 5) return 1; - LL a{std::atof(argv[1]), std::atof(argv[2])}; - LL b{std::atof(argv[3]), std::atof(argv[4])}; + char* end; + LL a{std::strtod(argv[1], &end), std::strtod(argv[2], &end)}; + LL b{std::strtod(argv[3], &end), std::strtod(argv[4], &end)}; std::vector poly = { {40.7, -74.1}, {40.8, -74.1}, {40.8, -74.0}, {40.7, -74.0}, }; diff --git a/benchmarks/size/consumer_s2.cpp b/benchmarks/size/consumer_s2.cpp index 0b69fb8..6f55c82 100644 --- a/benchmarks/size/consumer_s2.cpp +++ b/benchmarks/size/consumer_s2.cpp @@ -13,8 +13,9 @@ int main(int argc, char** argv) { if (argc < 5) return 1; - const auto a = S2LatLng::FromDegrees(std::atof(argv[1]), std::atof(argv[2])).ToPoint(); - const auto b = S2LatLng::FromDegrees(std::atof(argv[3]), std::atof(argv[4])).ToPoint(); + char* end; + const auto a = S2LatLng::FromDegrees(std::strtod(argv[1], &end), std::strtod(argv[2], &end)).ToPoint(); + const auto b = S2LatLng::FromDegrees(std::strtod(argv[3], &end), std::strtod(argv[4], &end)).ToPoint(); std::vector verts = { S2LatLng::FromDegrees(40.7, -74.1).ToPoint(), S2LatLng::FromDegrees(40.8, -74.1).ToPoint(), diff --git a/benchmarks/size/measure.sh b/benchmarks/size/measure.sh index 4552a1f..8873502 100755 --- a/benchmarks/size/measure.sh +++ b/benchmarks/size/measure.sh @@ -22,6 +22,8 @@ fi SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" TMP="$(mktemp -d)" +LOG_DIR="${ROOT_DIR}/build-bench/size-logs" +ANY_FAILURE=0 trap 'rm -rf "${TMP}"' EXIT is_macos() { [ "$(uname)" = "Darwin" ]; } @@ -62,11 +64,18 @@ build() { # name, source, extra_flags... local name="$1"; shift local source="$1"; shift - local out="${TMP}/${name// /_}" - if "${CXX}" ${CXXFLAGS} ${EXTRA_LD} "${source}" -o "${out}" "$@" 2>"${TMP}/${name}.log"; then + local safe_name="${name// /_}" + local out="${TMP}/${safe_name}" + local log="${TMP}/${safe_name}.log" + if "${CXX}" ${CXXFLAGS} ${EXTRA_LD} "${source}" -o "${out}" "$@" 2>"${log}"; then strip "${out}" 2>/dev/null || true file_size "${out}" else + # Persist the failure log so the user can diagnose after the trap + # cleans up TMP. + mkdir -p "${LOG_DIR}" + cp "${log}" "${LOG_DIR}/${safe_name}.log" + ANY_FAILURE=1 echo "" # signals "skipped" fi } @@ -154,4 +163,6 @@ echo " - 'Installed' for geo-utils-cpp is the include/ directory (everything" echo " you ship). For other libraries it's the package install prefix on" echo " Homebrew (or the geometry subset for Boost) — what the user must" echo " have on disk to consume the library." -echo " - Build logs (if any) are in: ${TMP} (kept until script exit)" +if [ "${ANY_FAILURE}" = "1" ]; then + echo " - Some builds failed. Diagnostic logs preserved in: ${LOG_DIR}" +fi diff --git a/benchmarks/speed/bench_boost.cpp b/benchmarks/speed/bench_boost.cpp index 224fe77..0405326 100644 --- a/benchmarks/speed/bench_boost.cpp +++ b/benchmarks/speed/bench_boost.cpp @@ -4,8 +4,19 @@ // Boost.Geometry equivalents using `cs::spherical_equatorial` — // the same sphere-based model we use, so this is a fair head-to-head. // +// Conversion policy (matches bench_s2.cpp): +// * Streams of points: converted INSIDE the timed loop. +// * Long-lived polygon: pre-converted ONCE outside the loop. +// // Coordinate convention reminder: Boost.Geometry expects (lng, lat) order // for spherical_equatorial points. +// +// Strategy note for `contains`: bg::within with cs::spherical_equatorial +// auto-selects strategy::within::spherical_winding, which traces great-circle +// edges and classifies crossings carefully. Our own `geo::contains` defaults +// to rhumb-line edges (geodesic=false), which is significantly cheaper. +// The performance gap on `contains` reflects this algorithmic difference, +// not a Boost misconfiguration. #include @@ -85,7 +96,7 @@ static void BM_Boost_Heading(benchmark::State& state) { } BENCHMARK(BM_Boost_Heading)->Arg(1000)->Arg(100000); -// --- contains (within) ----------------------------------------------------- +// --- contains: polygon converted once; queries converted inside the loop -- static void BM_Boost_Contains(benchmark::State& state) { const auto poly_ll = geo::bench::regular_polygon( @@ -93,16 +104,13 @@ static void BM_Boost_Contains(benchmark::State& state) { const Polygon poly = to_boost_polygon(poly_ll); const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); - std::vector queries; - queries.reserve(queries_ll.size()); - for (const auto& q : queries_ll) queries.push_back(to_boost_point(q)); for (auto _ : state) { - for (const auto& q : queries) { - benchmark::DoNotOptimize(bg::within(q, poly)); + for (const auto& q : queries_ll) { + benchmark::DoNotOptimize(bg::within(to_boost_point(q), poly)); } } - state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); + state.SetItemsProcessed(state.iterations() * static_cast(queries_ll.size())); } BENCHMARK(BM_Boost_Contains)->Arg(10)->Arg(100)->Arg(1000); @@ -122,13 +130,13 @@ static void BM_Boost_Area(benchmark::State& state) { } BENCHMARK(BM_Boost_Area)->Arg(10)->Arg(100)->Arg(1000); -// --- path_length ----------------------------------------------------------- +// --- path_length: input is lat/lng; convert + sum inside the timed loop --- static void BM_Boost_PathLength(benchmark::State& state) { const auto path_ll = geo::bench::random_points(static_cast(state.range(0))); - const LineString ls = to_boost_linestring(path_ll); bg::strategy::distance::haversine haversine(kEarthRadius); for (auto _ : state) { + LineString ls = to_boost_linestring(path_ll); benchmark::DoNotOptimize(bg::length(ls, haversine)); } state.SetItemsProcessed(state.iterations() * state.range(0)); diff --git a/benchmarks/speed/bench_geo_utils.cpp b/benchmarks/speed/bench_geo_utils.cpp index 13fad86..c185d88 100644 --- a/benchmarks/speed/bench_geo_utils.cpp +++ b/benchmarks/speed/bench_geo_utils.cpp @@ -6,7 +6,8 @@ #include #include -#include +#include +#include #include "random_data.hpp" diff --git a/benchmarks/speed/bench_s2.cpp b/benchmarks/speed/bench_s2.cpp index 00eba60..b3a3497 100644 --- a/benchmarks/speed/bench_s2.cpp +++ b/benchmarks/speed/bench_s2.cpp @@ -2,10 +2,15 @@ // Licensed under the Apache License, Version 2.0 // // S2 Geometry equivalents. S2 uses a sphere, same as us — apples-to-apples -// comparison on accuracy. Where the API forces a different shape (input is -// 3D unit vector S2Point, not lat/lng), we include the conversion in the -// timed loop because that is the realistic cost when the data comes from -// outside as lat/lng. +// comparison on accuracy. +// +// Conversion policy: +// * Streams of points (distance, contains queries, path_length): converted +// INSIDE the timed loop. The input is naturally lat/lng; the cost of +// reaching S2Point is part of the realistic cost. +// * Long-lived geometry (the polygon used by contains/area): pre-converted +// ONCE outside the loop. Matches the typical "geofence loaded once, +// queried many times" pattern. // // Note: S2 does not ship a public initial-bearing function, so the heading // benchmark is intentionally absent — this is itself information for the @@ -57,7 +62,7 @@ static void BM_S2_DistanceBetween(benchmark::State& state) { } BENCHMARK(BM_S2_DistanceBetween)->Arg(1000)->Arg(100000); -// --- contains: polygon converted once outside the loop --------------------- +// --- contains: polygon converted once; queries converted inside the loop -- static void BM_S2_Contains(benchmark::State& state) { const auto poly_ll = geo::bench::regular_polygon( @@ -66,14 +71,13 @@ static void BM_S2_Contains(benchmark::State& state) { loop->Normalize(); // S2Loop expects CCW; Normalize flips if needed. const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); - const auto queries = to_s2_points(queries_ll); for (auto _ : state) { - for (const auto& q : queries) { - benchmark::DoNotOptimize(loop->Contains(q)); + for (const auto& q : queries_ll) { + benchmark::DoNotOptimize(loop->Contains(to_s2_point(q))); } } - state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); + state.SetItemsProcessed(state.iterations() * static_cast(queries_ll.size())); } BENCHMARK(BM_S2_Contains)->Arg(10)->Arg(100)->Arg(1000); @@ -93,12 +97,12 @@ static void BM_S2_Area(benchmark::State& state) { } BENCHMARK(BM_S2_Area)->Arg(10)->Arg(100)->Arg(1000); -// --- path_length ----------------------------------------------------------- +// --- path_length: input is lat/lng; convert + sum inside the timed loop --- static void BM_S2_PathLength(benchmark::State& state) { const auto path_ll = geo::bench::random_points(static_cast(state.range(0))); - const S2Polyline line(to_s2_points(path_ll)); for (auto _ : state) { + S2Polyline line(to_s2_points(path_ll)); benchmark::DoNotOptimize(S2Earth::ToMeters(line.GetLength())); } state.SetItemsProcessed(state.iterations() * state.range(0)); diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 851d167..0455873 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -8,18 +8,22 @@ For build and run instructions see [`benchmarks/README.md`](../benchmarks/README ## TL;DR -- **Speed.** Tied with hand-written haversine on every operation we provide - (header-only design adds zero overhead). Within ~10 % of Boost.Geometry's - spherical strategy on raw distance/heading. ~10–30× faster than - GeographicLib's WGS84 geodesic — but **less accurate**, on a sphere. +- **Speed.** Ties Boost.Geometry's spherical strategy on `distance` / + `heading` (within noise) and wins clearly on `area`, `path_length`, and + `contains`. Wins vs S2 on `distance`, `heading`, `area`, `path_length`, + and on `contains` against tiny polygons (~10 vertices). ~10–30× faster + than GeographicLib's WGS84 geodesic — but **less accurate**, on a + sphere. Matches hand-written haversine within noise: zero header-only + overhead. - **Disk footprint.** **32 KB** of headers vs **32.8 MB** for S2 (with - abseil), **12.3 MB** for Boost.Geometry's `geometry` subset alone, **4.6 MB** - for GeographicLib — i.e. a ~144–1000× difference in what you have to ship - or have on disk. -- **Where competitors win.** S2 has spatial indexing so its `contains` - scales sub-linearly with polygon size; if you query a 1 000-vertex - geofence millions of times, S2 will win. We're still ~7× faster than - Boost.Geometry on `contains`, though. + abseil), **12.3 MB** for Boost.Geometry's `geometry` subset alone, + **4.6 MB** for GeographicLib — a ~144–1000× difference in what you have + to ship or have on disk. +- **Where competitors win.** As soon as `contains` polygons reach ~100 + vertices, S2's bounding-rectangle prefilter plus a tightly inlined + edge-crossing routine take over: S2 is ~4× faster than us at N=100 and + ~37× faster at N=1 000. For `contains` against geofences of ~100+ + vertices queried at high volume, S2 is the right tool. ## Methodology @@ -41,9 +45,12 @@ geofence-style usage. ### Apples-to-apples notes -- **S2 vs us:** both on a sphere. Fair on accuracy. S2 holds polygons in an - internal index (`S2Loop` keeps a bounding rect; `S2ShapeIndex` adds more); - that's why its `contains` scales differently from our linear loop. +- **S2 vs us:** both on a sphere. Fair on accuracy. `S2Loop` stores a + bounding rectangle with the loop and uses it as an early-exit predicate + inside `Contains`; that, plus a tightly inlined edge-crossing routine, + is what makes its `contains` scale differently from our linear ray-cast. + Note: this is *not* spatial indexing — that lives in `S2ShapeIndex`, + which we do not construct here. - **Boost.Geometry vs us:** uses `cs::spherical_equatorial`. Fair. - **GeographicLib vs us:** uses Karney's iterative WGS84 geodesic. Slower *and* more accurate. Treat as a trade-off data point, not a @@ -61,75 +68,77 @@ to reproduce. | Library | N=1 000 | N=100 000 | | ---------------------- | ------: | --------: | -| **geo-utils-cpp** | 36.7 | 25.2 | -| naive haversine | 37.0 | 25.7 | -| S2 Geometry | 14.3 | 10.8 | -| Boost.Geometry | 40.0 | 28.7 | +| **geo-utils-cpp** | 39.5 | 25.7 | +| naive haversine | 37.4 | 25.7 | +| S2 Geometry | 15.1 | 10.6 | +| Boost.Geometry | 38.4 | 27.0 | | GeographicLib (WGS84) | 1.22 | 1.22 | -We match naive haversine within noise. Boost.Geometry's haversine is ~10 % -faster — likely a different operation order in their inline code. S2 is -slower because each call converts lat/lng → 3D `S2Point`. GeographicLib is -~30× slower (and ~30× more accurate near long-distance pairs). +Tied with naive haversine and Boost.Geometry within noise. S2 is slower +because each call converts lat/lng → 3D `S2Point`. GeographicLib is ~30× +slower (and substantially more accurate on long-distance pairs). ### `heading` | Library | N=1 000 | N=100 000 | | ---------------------- | ------: | --------: | -| **geo-utils-cpp** | 23.1 | 13.9 | -| Boost.Geometry | 24.9 | 14.9 | +| **geo-utils-cpp** | 25.8 | 15.1 | +| Boost.Geometry | 26.3 | 14.3 | | GeographicLib | 1.15 | 1.14 | | S2 Geometry | _no public bearing API_ | | ### `contains` (point-in-polygon) -Million queries per second; 1 000 query points × polygon vertex count per -iteration. +Million queries per second (1 000 query points per iteration). | Library | poly N=10 | poly N=100 | poly N=1 000 | | -------------------- | --------: | ---------: | -----------: | -| **geo-utils-cpp** | 13.3 | 2.17 | 0.244 | -| S2 Geometry | 27.1 | 18.0 | 21.1 | -| Boost.Geometry | 1.89 | 0.23 | 0.024 | +| **geo-utils-cpp** | 15.9 | 2.82 | 0.321 | +| S2 Geometry | 12.9 | 11.3 | 11.9 | +| Boost.Geometry | 1.85 | 0.23 | 0.024 | | GeographicLib | _no native PIP_ | | | -This is where the libraries differ most. **S2 has a built-in bounding-rect -test plus a stream-of-edges layout** that makes its loop containment near -sub-linear in vertex count. Our implementation is a textbook O(N) ray cast -through the great-circle edges — fast for small geofences, slower than S2 -on huge ones. Boost.Geometry's spherical `within` is significantly slower -than both. - -If your workload is "millions of queries against the same large polygon", -S2 will beat us. If it's "small or medium polygons, queried in any volume", -we're competitive and you get a ~1000× smaller install. +This is where the libraries differ most. **S2's `S2Loop::Contains` exits +early via a bounding-rectangle prefilter** for queries clearly outside the +loop, and uses a tightly inlined edge-crossing routine for the rest — so its +throughput is roughly *constant* in vertex count over our test range. Our +implementation is a textbook O(N) ray cast through edges (rhumb-line by +default), so we beat S2 only on tiny polygons (~10 vertices). From ~100 +vertices onward S2's prefilter dominates: ~4× faster than us at N=100, +~37× faster at N=1 000. Boost.Geometry's spherical `within` traces +*great-circle* edges — a heavier per-edge predicate that explains its +much lower throughput throughout. + +If your workload involves polygons of ~100+ vertices queried at any volume, +S2 wins `contains`. If it's "tiny polygons, or you can't ship a 33 MB +dependency", we beat both S2 and Boost.Geometry, with a ~1000× smaller +install. ### `area` (M polygons/s × vertex count) | Library | N=10 | N=100 | N=1 000 | | -------------------- | ---: | ----: | ------: | -| **geo-utils-cpp** | 62.4 | 59.7 | 60.6 | -| S2 Geometry | 16.1 | 13.8 | 13.8 | -| Boost.Geometry | 44.7 | 36.1 | 36.5 | -| GeographicLib | 1.71 | 2.00 | 2.04 | +| **geo-utils-cpp** | 69.5 | 66.6 | 65.5 | +| S2 Geometry | 15.6 | 13.7 | 13.4 | +| Boost.Geometry | 43.8 | 35.3 | 36.1 | +| GeographicLib | 1.71 | 1.99 | 2.04 | We win clearly on `area` — our spherical-triangle accumulation is a -straight-line loop with no allocation; S2 spends time inside `S2Loop::GetArea` -maintaining its index, and Boost.Geometry's strategy machinery costs ~1.6×. +straight-line loop with no allocation; S2's `S2Loop::GetArea` does more work +per vertex, and Boost.Geometry's strategy machinery costs ~1.8×. ### `path_length` (M points/s) | Library | N=10 | N=100 | N=1 000 | | -------------------- | ---: | ----: | ------: | -| **geo-utils-cpp** | 52.0 | 43.9 | 39.7 | -| S2 Geometry | 104.6| 96.2 | 85.7 | -| Boost.Geometry | 49.0 | 43.2 | 39.9 | -| GeographicLib | 1.50 | 1.24 | 1.21 | +| **geo-utils-cpp** | 51.3 | 44.9 | 41.3 | +| S2 Geometry | 39.3 | 45.7 | 36.6 | +| Boost.Geometry | 43.8 | 39.2 | 38.8 | +| GeographicLib | 1.50 | 1.25 | 1.20 | -S2 wins by ~2× because the `S2Polyline` is pre-built (3D unit vectors stored -contiguously) and the inner loop reduces to dot products rather than `sin`, -`cos`, `atan2`. We tied with Boost.Geometry. GeographicLib pays the -ellipsoidal cost. +Comfortably ahead of S2 at N=10 (~30 %) and N=1 000 (~13 %); tied at +N=100 within noise. Ahead of Boost.Geometry at every size. GeographicLib +pays the ellipsoidal cost. ## Disk footprint @@ -155,12 +164,12 @@ the full Boost install is ~362 MB. ## Where each library is the right tool -- **geo-utils-cpp** — small/medium polygons, lat/lng inputs, no-deps - constraint, container/mobile bundle size matters. Sphere accuracy is - acceptable. -- **S2 Geometry** — millions of queries against the same large geofence, - spatial indexing matters, you can afford ~33 MB install. Different - data model (3D unit vectors). +- **geo-utils-cpp** — tiny polygons, lat/lng inputs, no-deps constraint, + container/mobile bundle size matters. Sphere accuracy is acceptable. +- **S2 Geometry** — many `contains` queries against polygons of ~100+ + vertices, bounding-rectangle prefilter / spatial indexing (`S2ShapeIndex`) + matter, and you can afford a ~33 MB install. Different data model + (3D unit vectors). - **Boost.Geometry** — already-Boost project, want one library for many geometry types and CSes. Slower at PIP than both us and S2; pulls in the whole `geometry` subset. From 0f80985b8d5aa443440e946e085d14afa49be946 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Sat, 9 May 2026 16:04:55 +0200 Subject: [PATCH 3/9] Refreshed benchmark numbers and tightened apples-to-apples comparison --- docs/benchmarks.md | 61 +++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 0455873..0a5425f 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -62,17 +62,18 @@ geofence-style usage. Throughput in million items per second (higher is better). All numbers from the host described above. Run `./build-bench/benchmarks/bench_*` locally -to reproduce. +to reproduce. **Bold** number = column winner, or co-winners within ~5% +(noise-level tie); bold library name = this library (`geo-utils-cpp`). ### `distance_between` -| Library | N=1 000 | N=100 000 | -| ---------------------- | ------: | --------: | -| **geo-utils-cpp** | 39.5 | 25.7 | -| naive haversine | 37.4 | 25.7 | -| S2 Geometry | 15.1 | 10.6 | -| Boost.Geometry | 38.4 | 27.0 | -| GeographicLib (WGS84) | 1.22 | 1.22 | +| Library | N=1 000 | N=100 000 | +| ---------------------- | -------: | --------: | +| **geo-utils-cpp** | **39.5** | **25.7** | +| naive haversine | **37.4** | **25.7** | +| S2 Geometry | 15.1 | 10.6 | +| Boost.Geometry | **38.4** | **27.0** | +| GeographicLib (WGS84) | 1.22 | 1.22 | Tied with naive haversine and Boost.Geometry within noise. S2 is slower because each call converts lat/lng → 3D `S2Point`. GeographicLib is ~30× @@ -80,11 +81,11 @@ slower (and substantially more accurate on long-distance pairs). ### `heading` -| Library | N=1 000 | N=100 000 | -| ---------------------- | ------: | --------: | -| **geo-utils-cpp** | 25.8 | 15.1 | -| Boost.Geometry | 26.3 | 14.3 | -| GeographicLib | 1.15 | 1.14 | +| Library | N=1 000 | N=100 000 | +| ---------------------- | -------: | --------: | +| **geo-utils-cpp** | **25.8** | **15.1** | +| Boost.Geometry | **26.3** | **14.3** | +| GeographicLib | 1.15 | 1.14 | | S2 Geometry | _no public bearing API_ | | ### `contains` (point-in-polygon) @@ -93,8 +94,8 @@ Million queries per second (1 000 query points per iteration). | Library | poly N=10 | poly N=100 | poly N=1 000 | | -------------------- | --------: | ---------: | -----------: | -| **geo-utils-cpp** | 15.9 | 2.82 | 0.321 | -| S2 Geometry | 12.9 | 11.3 | 11.9 | +| **geo-utils-cpp** | **15.9** | 2.82 | 0.321 | +| S2 Geometry | 12.9 | **11.3** | **11.9** | | Boost.Geometry | 1.85 | 0.23 | 0.024 | | GeographicLib | _no native PIP_ | | | @@ -116,12 +117,12 @@ install. ### `area` (M polygons/s × vertex count) -| Library | N=10 | N=100 | N=1 000 | -| -------------------- | ---: | ----: | ------: | -| **geo-utils-cpp** | 69.5 | 66.6 | 65.5 | -| S2 Geometry | 15.6 | 13.7 | 13.4 | -| Boost.Geometry | 43.8 | 35.3 | 36.1 | -| GeographicLib | 1.71 | 1.99 | 2.04 | +| Library | N=10 | N=100 | N=1 000 | +| -------------------- | -------: | -------: | -------: | +| **geo-utils-cpp** | **69.5** | **66.6** | **65.5** | +| S2 Geometry | 15.6 | 13.7 | 13.4 | +| Boost.Geometry | 43.8 | 35.3 | 36.1 | +| GeographicLib | 1.71 | 1.99 | 2.04 | We win clearly on `area` — our spherical-triangle accumulation is a straight-line loop with no allocation; S2's `S2Loop::GetArea` does more work @@ -129,12 +130,12 @@ per vertex, and Boost.Geometry's strategy machinery costs ~1.8×. ### `path_length` (M points/s) -| Library | N=10 | N=100 | N=1 000 | -| -------------------- | ---: | ----: | ------: | -| **geo-utils-cpp** | 51.3 | 44.9 | 41.3 | -| S2 Geometry | 39.3 | 45.7 | 36.6 | -| Boost.Geometry | 43.8 | 39.2 | 38.8 | -| GeographicLib | 1.50 | 1.25 | 1.20 | +| Library | N=10 | N=100 | N=1 000 | +| -------------------- | -------: | -------: | -------: | +| **geo-utils-cpp** | **51.3** | **44.9** | **41.3** | +| S2 Geometry | 39.3 | **45.7** | 36.6 | +| Boost.Geometry | 43.8 | 39.2 | 38.8 | +| GeographicLib | 1.50 | 1.25 | 1.20 | Comfortably ahead of S2 at N=10 (~30 %) and N=1 000 (~13 %); tied at N=100 within noise. Ahead of Boost.Geometry at every size. GeographicLib @@ -148,10 +149,10 @@ plus the on-disk install size of each library. Smaller is better. | Library | Stripped binary | Library install | Notes | | -------------------- | --------------: | --------------: | ---------------------------------- | | **geo-utils-cpp** | **33 KB** | **32 KB** | header-only, zero deps | -| naive haversine | 33 KB | 0 | hand-written, no library | -| S2 Geometry | 33.4 KB | 32.8 MB | S2 7.1 MB + abseil 14 MB (+ rest) | +| naive haversine | **33 KB** | 0 | hand-written, no library | +| S2 Geometry | **33.4 KB** | 32.8 MB | S2 7.1 MB + abseil 14 MB (+ rest) | | Boost.Geometry | 50.8 KB | 12.3 MB | only the `geometry` subset of Boost | -| GeographicLib | 33 KB | 4.6 MB | distance only — no PIP | +| GeographicLib | **33 KB** | 4.6 MB | distance only — no PIP | The "Library install" column is *what you have to ship or have on disk* to use the library. For `geo-utils-cpp` that's the whole `include/` directory From fb9aa37f0c8157b4240741b33ff73a27b58c544b Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Sat, 9 May 2026 16:17:26 +0200 Subject: [PATCH 4/9] Fix measure.sh failure-tracking; refresh install-size figures --- README.md | 4 ++-- benchmarks/size/measure.sh | 8 +++++--- docs/benchmarks.md | 10 +++++----- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f3df51b..c059d44 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ tiny disk footprint thanks to header-only + zero dependencies. | Library | `distance_between` (M pairs/s) | `contains` (poly N=10, M qps) | Install size | | -------------------- | -----------------------------: | ----------------------------: | ------------: | -| **geo-utils-cpp** | **39.5** | **15.9** | **32 KB** | +| **geo-utils-cpp** | **39.5** | **15.9** | **36 KB** | | naive haversine | 37.4 | — | 0 | | S2 Geometry | 15.1 | 12.9 | 32.8 MB | | Boost.Geometry | 38.4 | 1.85| 12.3 MB | @@ -157,7 +157,7 @@ per-pair ops (`distance`, `heading`) within noise; ahead on polygon ops (`area`, `path_length`, `contains` for tiny polygons). Faster than S2 on `distance`, `heading`, `area`, `path_length`, and on `contains` against ~10-vertex polygons. **S2 wins `contains` from ~100 vertices onward** via -its bounding-rectangle prefilter. **144–1000× smaller install footprint** +its bounding-rectangle prefilter. **130–900× smaller install footprint** than the alternatives. Zero overhead over hand-written haversine. See [docs/benchmarks.md](docs/benchmarks.md) for the full methodology, all diff --git a/benchmarks/size/measure.sh b/benchmarks/size/measure.sh index 8873502..51f69a4 100755 --- a/benchmarks/size/measure.sh +++ b/benchmarks/size/measure.sh @@ -23,7 +23,9 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" TMP="$(mktemp -d)" LOG_DIR="${ROOT_DIR}/build-bench/size-logs" -ANY_FAILURE=0 +# build() runs inside command substitution, so plain shell variables can't +# propagate failure state back to the parent. Use a sentinel file instead. +FAILURE_MARKER="${TMP}/.any_failure" trap 'rm -rf "${TMP}"' EXIT is_macos() { [ "$(uname)" = "Darwin" ]; } @@ -75,7 +77,7 @@ build() { # cleans up TMP. mkdir -p "${LOG_DIR}" cp "${log}" "${LOG_DIR}/${safe_name}.log" - ANY_FAILURE=1 + : > "${FAILURE_MARKER}" echo "" # signals "skipped" fi } @@ -163,6 +165,6 @@ echo " - 'Installed' for geo-utils-cpp is the include/ directory (everything" echo " you ship). For other libraries it's the package install prefix on" echo " Homebrew (or the geometry subset for Boost) — what the user must" echo " have on disk to consume the library." -if [ "${ANY_FAILURE}" = "1" ]; then +if [ -f "${FAILURE_MARKER}" ]; then echo " - Some builds failed. Diagnostic logs preserved in: ${LOG_DIR}" fi diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 0a5425f..75d1077 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -15,9 +15,9 @@ For build and run instructions see [`benchmarks/README.md`](../benchmarks/README than GeographicLib's WGS84 geodesic — but **less accurate**, on a sphere. Matches hand-written haversine within noise: zero header-only overhead. -- **Disk footprint.** **32 KB** of headers vs **32.8 MB** for S2 (with +- **Disk footprint.** **36 KB** of headers vs **32.8 MB** for S2 (with abseil), **12.3 MB** for Boost.Geometry's `geometry` subset alone, - **4.6 MB** for GeographicLib — a ~144–1000× difference in what you have + **4.6 MB** for GeographicLib — a ~130–900× difference in what you have to ship or have on disk. - **Where competitors win.** As soon as `contains` polygons reach ~100 vertices, S2's bounding-rectangle prefilter plus a tightly inlined @@ -112,7 +112,7 @@ much lower throughput throughout. If your workload involves polygons of ~100+ vertices queried at any volume, S2 wins `contains`. If it's "tiny polygons, or you can't ship a 33 MB -dependency", we beat both S2 and Boost.Geometry, with a ~1000× smaller +dependency", we beat both S2 and Boost.Geometry, with a ~900× smaller install. ### `area` (M polygons/s × vertex count) @@ -148,7 +148,7 @@ plus the on-disk install size of each library. Smaller is better. | Library | Stripped binary | Library install | Notes | | -------------------- | --------------: | --------------: | ---------------------------------- | -| **geo-utils-cpp** | **33 KB** | **32 KB** | header-only, zero deps | +| **geo-utils-cpp** | **33 KB** | **36 KB** | header-only, zero deps | | naive haversine | **33 KB** | 0 | hand-written, no library | | S2 Geometry | **33.4 KB** | 32.8 MB | S2 7.1 MB + abseil 14 MB (+ rest) | | Boost.Geometry | 50.8 KB | 12.3 MB | only the `geometry` subset of Boost | @@ -160,7 +160,7 @@ use the library. For `geo-utils-cpp` that's the whole `include/` directory prefix from Homebrew. Boost is reported as just its `geometry` headers — the full Boost install is ~362 MB. -**The on-disk gap is roughly 144× to 1000×.** If binary size matters +**The on-disk gap is roughly 130× to 900×.** If binary size matters (embedded, container layers, mobile bundles), this is the headline. ## Where each library is the right tool From 47d33d2b07008d850652a6b30a2ccc5e63871842 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Sun, 10 May 2026 14:51:11 +0200 Subject: [PATCH 5/9] Pre-build competitor native types outside the timed loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR #14 review feedback (@MrHerrn): with native point/geometry types built fresh inside the benchmark loop, conversion overhead was being charged to each competitor's per-call cost, inflating the gap on ops like Boost.Geometry's path_length. Move every native-type build out of the timed region so the numbers reflect algorithmic cost only. bench_geographiclib's area / path_length intentionally keep the PolygonArea + AddPoint work in-loop: AddPoint *is* the per-vertex geodesic computation, and Compute() only adds the closing-edge contribution — pre-building outside the loop would measure a cached double read, not real work. Comment explains this. Refresh README.md and docs/benchmarks.md result tables and TL;DR to match the new picture: ties Boost.Geometry on distance / heading / path_length within noise; wins clearly on area; trails S2 on distance / path_length / contains algorithmically (S2 still pays lat/lng->S2Point conversion in real lat/lng workloads, which these algorithm-only numbers do not count). Install-size advantage of 130-900x is unchanged. --- README.md | 31 ++--- benchmarks/README.md | 10 +- benchmarks/speed/bench_boost.cpp | 40 +++--- benchmarks/speed/bench_geographiclib.cpp | 15 +++ benchmarks/speed/bench_s2.cpp | 34 +++--- docs/benchmarks.md | 148 +++++++++++++---------- 6 files changed, 157 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index c059d44..4289378 100644 --- a/README.md +++ b/README.md @@ -144,21 +144,22 @@ int main() { `geo-utils-cpp` is a near-zero-overhead wrapper over the math itself, with a tiny disk footprint thanks to header-only + zero dependencies. -| Library | `distance_between` (M pairs/s) | `contains` (poly N=10, M qps) | Install size | -| -------------------- | -----------------------------: | ----------------------------: | ------------: | -| **geo-utils-cpp** | **39.5** | **15.9** | **36 KB** | -| naive haversine | 37.4 | — | 0 | -| S2 Geometry | 15.1 | 12.9 | 32.8 MB | -| Boost.Geometry | 38.4 | 1.85| 12.3 MB | -| GeographicLib | 1.2 | no native PIP | 4.6 MB | - -Apple M1 · clang 17 · `-O2 -DNDEBUG`. Tied with Boost.Geometry on -per-pair ops (`distance`, `heading`) within noise; ahead on polygon ops -(`area`, `path_length`, `contains` for tiny polygons). Faster than S2 on -`distance`, `heading`, `area`, `path_length`, and on `contains` against -~10-vertex polygons. **S2 wins `contains` from ~100 vertices onward** via -its bounding-rectangle prefilter. **130–900× smaller install footprint** -than the alternatives. Zero overhead over hand-written haversine. +| Library | `distance_between` (M pairs/s) | `area` (poly N=100, M polys/s) | Install size | +| -------------------- | -----------------------------: | -----------------------------: | ------------: | +| **geo-utils-cpp** | **40.5** | **67.2** | **36 KB** | +| naive haversine | 38.3 | — | 0 | +| S2 Geometry | 82.9 | 14.0 | 32.8 MB | +| Boost.Geometry | 39.8 | 36.2 | 12.3 MB | +| GeographicLib | 1.2 | 2.0 | 4.6 MB | + +Apple M1 · clang 17 · `-O2 -DNDEBUG`. Native types pre-built outside the +timed loop — numbers reflect algorithmic cost only. Ties Boost.Geometry's +spherical strategy on `distance` / `heading` / `path_length` within noise; +**wins clearly on `area`** (allocation-free triangle-fan accumulator). S2 +is faster algorithmically on `distance` / `path_length` / `contains` — but +pays a per-call `lat/lng → S2Point` conversion in real-world lat/lng +workloads (not counted here). **130–900× smaller install footprint** than +the alternatives. Zero overhead over hand-written haversine. See [docs/benchmarks.md](docs/benchmarks.md) for the full methodology, all operations, and a discussion of when to reach for each library. diff --git a/benchmarks/README.md b/benchmarks/README.md index b4743cb..85a6e9d 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -96,10 +96,12 @@ or flags via `CXX=` and `CXXFLAGS=`. - **Same inputs everywhere.** All benchmarks pull data from `common/random_data.hpp`, which is seeded deterministically. -- **Conversion costs are included** for libraries that don't accept lat/lng - natively (S2 takes `S2Point`; Boost takes its own point type). Pre-converting - outside the timing loop would understate the cost of "I have lat/lng, I want - a distance" — the actual usage pattern. +- **Native types are pre-built outside the timing loop** for every library. + The timed work is the library's per-call computation, isolated from + `lat/lng → native-type` plumbing — so the numbers compare algorithmic cost, + not data-shape conversion overhead. (`geo-utils-cpp` accepts lat/lng + directly, so it has nothing to pre-build; that is a separate API-shape + advantage and not part of these speed numbers.) - **Polygons are pre-built** outside the timing loop because in real code a geofence is a one-time setup cost. - **Don't compare GeographicLib's distance/heading head-to-head as "speed".** diff --git a/benchmarks/speed/bench_boost.cpp b/benchmarks/speed/bench_boost.cpp index 0405326..c8e1ddc 100644 --- a/benchmarks/speed/bench_boost.cpp +++ b/benchmarks/speed/bench_boost.cpp @@ -4,9 +4,10 @@ // Boost.Geometry equivalents using `cs::spherical_equatorial` — // the same sphere-based model we use, so this is a fair head-to-head. // -// Conversion policy (matches bench_s2.cpp): -// * Streams of points: converted INSIDE the timed loop. -// * Long-lived polygon: pre-converted ONCE outside the loop. +// Conversion policy (matches bench_s2.cpp): every native point/geometry is +// pre-built OUTSIDE the timed loop. The timed work is the library's per-call +// computation, isolated from data-shape conversion overhead — so the numbers +// reflect algorithmic cost, not "lat/lng-in / lat/lng-out" plumbing. // // Coordinate convention reminder: Boost.Geometry expects (lng, lat) order // for spherical_equatorial points. @@ -68,13 +69,14 @@ inline LineString to_boost_linestring(const std::vector& pts) { // --- distance -------------------------------------------------------------- static void BM_Boost_DistanceBetween(benchmark::State& state) { - const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + const auto pts_ll = geo::bench::random_points(2 * static_cast(state.range(0))); + std::vector pts; + pts.reserve(pts_ll.size()); + for (const auto& p : pts_ll) pts.push_back(to_boost_point(p)); bg::strategy::distance::haversine haversine(kEarthRadius); for (auto _ : state) { for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { - const Point a = to_boost_point(pts[i]); - const Point b = to_boost_point(pts[i + 1]); - benchmark::DoNotOptimize(bg::distance(a, b, haversine)); + benchmark::DoNotOptimize(bg::distance(pts[i], pts[i + 1], haversine)); } } state.SetItemsProcessed(state.iterations() * state.range(0)); @@ -84,19 +86,20 @@ BENCHMARK(BM_Boost_DistanceBetween)->Arg(1000)->Arg(100000); // --- heading (azimuth) ----------------------------------------------------- static void BM_Boost_Heading(benchmark::State& state) { - const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + const auto pts_ll = geo::bench::random_points(2 * static_cast(state.range(0))); + std::vector pts; + pts.reserve(pts_ll.size()); + for (const auto& p : pts_ll) pts.push_back(to_boost_point(p)); for (auto _ : state) { for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { - const Point a = to_boost_point(pts[i]); - const Point b = to_boost_point(pts[i + 1]); - benchmark::DoNotOptimize(bg::azimuth(a, b)); + benchmark::DoNotOptimize(bg::azimuth(pts[i], pts[i + 1])); } } state.SetItemsProcessed(state.iterations() * state.range(0)); } BENCHMARK(BM_Boost_Heading)->Arg(1000)->Arg(100000); -// --- contains: polygon converted once; queries converted inside the loop -- +// --- contains: polygon and query points pre-converted outside the loop ---- static void BM_Boost_Contains(benchmark::State& state) { const auto poly_ll = geo::bench::regular_polygon( @@ -104,13 +107,16 @@ static void BM_Boost_Contains(benchmark::State& state) { const Polygon poly = to_boost_polygon(poly_ll); const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + std::vector queries; + queries.reserve(queries_ll.size()); + for (const auto& q : queries_ll) queries.push_back(to_boost_point(q)); for (auto _ : state) { - for (const auto& q : queries_ll) { - benchmark::DoNotOptimize(bg::within(to_boost_point(q), poly)); + for (const auto& q : queries) { + benchmark::DoNotOptimize(bg::within(q, poly)); } } - state.SetItemsProcessed(state.iterations() * static_cast(queries_ll.size())); + state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); } BENCHMARK(BM_Boost_Contains)->Arg(10)->Arg(100)->Arg(1000); @@ -130,13 +136,13 @@ static void BM_Boost_Area(benchmark::State& state) { } BENCHMARK(BM_Boost_Area)->Arg(10)->Arg(100)->Arg(1000); -// --- path_length: input is lat/lng; convert + sum inside the timed loop --- +// --- path_length: LineString pre-built outside the timed loop ------------ static void BM_Boost_PathLength(benchmark::State& state) { const auto path_ll = geo::bench::random_points(static_cast(state.range(0))); + const LineString ls = to_boost_linestring(path_ll); bg::strategy::distance::haversine haversine(kEarthRadius); for (auto _ : state) { - LineString ls = to_boost_linestring(path_ll); benchmark::DoNotOptimize(bg::length(ls, haversine)); } state.SetItemsProcessed(state.iterations() * state.range(0)); diff --git a/benchmarks/speed/bench_geographiclib.cpp b/benchmarks/speed/bench_geographiclib.cpp index e13f8a8..45c5453 100644 --- a/benchmarks/speed/bench_geographiclib.cpp +++ b/benchmarks/speed/bench_geographiclib.cpp @@ -6,6 +6,15 @@ // slower. The numbers here intentionally show that gap; this is a // *trade-off* benchmark, not a "we are faster" benchmark. // +// Conversion policy: distance/heading work directly on lat/lng (no +// conversion at all). For area / path_length we DO build PolygonArea + +// AddPoint inside the timed loop — unlike Boost.Geometry / S2, where the +// "build native type" step is separable from "run algorithm", PolygonArea +// is an incremental accumulator: AddPoint *is* the geodesic work, and +// Compute() only adds the closing-edge contribution and returns the +// already-accumulated value. Pre-building outside the loop would measure +// nothing real (a cached double read). +// // GeographicLib does not ship a native point-in-polygon predicate, so the // `contains` benchmark is omitted (this is itself information for the reader). @@ -63,6 +72,12 @@ static void BM_GeographicLib_Heading(benchmark::State& state) { BENCHMARK(BM_GeographicLib_Heading)->Arg(1000)->Arg(100000); // --- area (PolygonArea, polyline=false) ----------------------------------- +// +// AddPoint is where the per-vertex geodesic work happens (an Inverse() call +// per vertex); Compute() is essentially free. We measure the full pattern +// "build + finalize" because that is what computing the area of an N-vertex +// polygon actually costs in GeographicLib — there is no separate algorithm +// step to time on its own. static void BM_GeographicLib_Area(benchmark::State& state) { const auto poly = geo::bench::regular_polygon( diff --git a/benchmarks/speed/bench_s2.cpp b/benchmarks/speed/bench_s2.cpp index b3a3497..c266376 100644 --- a/benchmarks/speed/bench_s2.cpp +++ b/benchmarks/speed/bench_s2.cpp @@ -4,13 +4,11 @@ // S2 Geometry equivalents. S2 uses a sphere, same as us — apples-to-apples // comparison on accuracy. // -// Conversion policy: -// * Streams of points (distance, contains queries, path_length): converted -// INSIDE the timed loop. The input is naturally lat/lng; the cost of -// reaching S2Point is part of the realistic cost. -// * Long-lived geometry (the polygon used by contains/area): pre-converted -// ONCE outside the loop. Matches the typical "geofence loaded once, -// queried many times" pattern. +// Conversion policy: every native point/geometry is pre-built OUTSIDE the +// timed loop. The timed work is the library's per-call computation, isolated +// from data-shape conversion overhead — so the numbers reflect algorithmic +// cost, not "lat/lng-in / lat/lng-out" plumbing. (Note: this means S2's +// well-known per-call lat/lng→S2Point cost is *not* counted here.) // // Note: S2 does not ship a public initial-bearing function, so the heading // benchmark is intentionally absent — this is itself information for the @@ -46,15 +44,14 @@ inline std::vector to_s2_points(const std::vector& src) { } // namespace -// --- distance: input is lat/lng, conversion counted in the timed loop ------ +// --- distance: points pre-converted outside the timed loop ---------------- static void BM_S2_DistanceBetween(benchmark::State& state) { - const auto pts = geo::bench::random_points(2 * static_cast(state.range(0))); + const auto pts_ll = geo::bench::random_points(2 * static_cast(state.range(0))); + const auto pts = to_s2_points(pts_ll); for (auto _ : state) { for (std::size_t i = 0; i + 1 < pts.size(); i += 2) { - const S2Point a = to_s2_point(pts[i]); - const S2Point b = to_s2_point(pts[i + 1]); - const S1Angle angle(a, b); + const S1Angle angle(pts[i], pts[i + 1]); benchmark::DoNotOptimize(S2Earth::ToMeters(angle)); } } @@ -62,7 +59,7 @@ static void BM_S2_DistanceBetween(benchmark::State& state) { } BENCHMARK(BM_S2_DistanceBetween)->Arg(1000)->Arg(100000); -// --- contains: polygon converted once; queries converted inside the loop -- +// --- contains: polygon and query points pre-converted outside the loop --- static void BM_S2_Contains(benchmark::State& state) { const auto poly_ll = geo::bench::regular_polygon( @@ -71,13 +68,14 @@ static void BM_S2_Contains(benchmark::State& state) { loop->Normalize(); // S2Loop expects CCW; Normalize flips if needed. const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + const auto queries = to_s2_points(queries_ll); for (auto _ : state) { - for (const auto& q : queries_ll) { - benchmark::DoNotOptimize(loop->Contains(to_s2_point(q))); + for (const auto& q : queries) { + benchmark::DoNotOptimize(loop->Contains(q)); } } - state.SetItemsProcessed(state.iterations() * static_cast(queries_ll.size())); + state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); } BENCHMARK(BM_S2_Contains)->Arg(10)->Arg(100)->Arg(1000); @@ -97,12 +95,12 @@ static void BM_S2_Area(benchmark::State& state) { } BENCHMARK(BM_S2_Area)->Arg(10)->Arg(100)->Arg(1000); -// --- path_length: input is lat/lng; convert + sum inside the timed loop --- +// --- path_length: S2Polyline pre-built outside the timed loop ------------ static void BM_S2_PathLength(benchmark::State& state) { const auto path_ll = geo::bench::random_points(static_cast(state.range(0))); + const S2Polyline line(to_s2_points(path_ll)); for (auto _ : state) { - S2Polyline line(to_s2_points(path_ll)); benchmark::DoNotOptimize(S2Earth::ToMeters(line.GetLength())); } state.SetItemsProcessed(state.iterations() * state.range(0)); diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 75d1077..92ffe59 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -8,22 +8,26 @@ For build and run instructions see [`benchmarks/README.md`](../benchmarks/README ## TL;DR -- **Speed.** Ties Boost.Geometry's spherical strategy on `distance` / - `heading` (within noise) and wins clearly on `area`, `path_length`, and - `contains`. Wins vs S2 on `distance`, `heading`, `area`, `path_length`, - and on `contains` against tiny polygons (~10 vertices). ~10–30× faster - than GeographicLib's WGS84 geodesic — but **less accurate**, on a - sphere. Matches hand-written haversine within noise: zero header-only - overhead. +- **Speed.** On the algorithm itself (native types pre-built): ties + Boost.Geometry's spherical strategy on `distance` / `heading` / + `path_length` (within noise), wins clearly on `area`, and matches a + hand-written haversine on `distance`. S2 Geometry's 3D-cartesian model + is **faster** than us on `distance`, `path_length`, and `contains` — but + it pays a per-call `lat/lng → S2Point` conversion in real-world + lat/lng-input workloads (not counted in these algorithm-only numbers). + ~20–30× faster than GeographicLib's WGS84 geodesic — but **less + accurate**, on a sphere. - **Disk footprint.** **36 KB** of headers vs **32.8 MB** for S2 (with abseil), **12.3 MB** for Boost.Geometry's `geometry` subset alone, **4.6 MB** for GeographicLib — a ~130–900× difference in what you have to ship or have on disk. -- **Where competitors win.** As soon as `contains` polygons reach ~100 - vertices, S2's bounding-rectangle prefilter plus a tightly inlined - edge-crossing routine take over: S2 is ~4× faster than us at N=100 and - ~37× faster at N=1 000. For `contains` against geofences of ~100+ - vertices queried at high volume, S2 is the right tool. +- **Where competitors win.** S2 wins on `contains` (its 3D edge-crossing + plus a bounding-rectangle prefilter make throughput roughly constant in + vertex count) and on `distance` / `path_length` once you exclude + conversion. If those are your hot path *and* you can ship a 33 MB + dependency, S2 is the right tool. **Where we win.** `area` (allocation- + free triangle-fan accumulator), and any workload where shipping a + multi-MB dependency is a non-starter. ## Methodology @@ -38,10 +42,12 @@ For build and run instructions see [`benchmarks/README.md`](../benchmarks/README Each library is fed identical inputs (see [`benchmarks/common/random_data.hpp`](../benchmarks/common/random_data.hpp)). -Conversion to a library's native point type is **counted in the timed -loop** when the input is naturally lat/lng (distance, heading). For polygon -operations the polygon is converted once outside the loop, matching real -geofence-style usage. +Each library's **native point/geometry types are pre-built outside the timed +loop**. The timed work is the per-call computation (`bg::distance`, +`S2Loop::Contains`, `pa.Compute()`, etc.) — this isolates algorithmic cost +from `lat/lng → native-type` plumbing. `geo-utils-cpp`'s API takes lat/lng +directly, so it has nothing to pre-build; this is a real API-shape advantage +but is not what the speed numbers below measure. ### Apples-to-apples notes @@ -69,23 +75,28 @@ to reproduce. **Bold** number = column winner, or co-winners within ~5% | Library | N=1 000 | N=100 000 | | ---------------------- | -------: | --------: | -| **geo-utils-cpp** | **39.5** | **25.7** | -| naive haversine | **37.4** | **25.7** | -| S2 Geometry | 15.1 | 10.6 | -| Boost.Geometry | **38.4** | **27.0** | -| GeographicLib (WGS84) | 1.22 | 1.22 | - -Tied with naive haversine and Boost.Geometry within noise. S2 is slower -because each call converts lat/lng → 3D `S2Point`. GeographicLib is ~30× -slower (and substantially more accurate on long-distance pairs). +| **geo-utils-cpp** | 40.5 | **28.2** | +| naive haversine | 38.3 | 26.0 | +| S2 Geometry | **82.9** | **29.1** | +| Boost.Geometry | 39.8 | **28.8** | +| GeographicLib (WGS84) | 1.25 | 1.24 | + +We tie naive haversine and Boost.Geometry's spherical strategy within +noise — zero overhead from being a library. S2 is **2× faster at small N** +because once the input is `S2Point` the per-pair distance reduces to a dot +product / `acos`, cheaper than haversine; the gap closes at N=100 000 where +all three become memory-bandwidth bound. Note: in real-world workloads +where the input *is* lat/lng, S2 also pays a per-call lat/lng→S2Point +conversion that isn't counted here. GeographicLib is ~25× slower (and +substantially more accurate on long-distance pairs). ### `heading` | Library | N=1 000 | N=100 000 | | ---------------------- | -------: | --------: | -| **geo-utils-cpp** | **25.8** | **15.1** | -| Boost.Geometry | **26.3** | **14.3** | -| GeographicLib | 1.15 | 1.14 | +| **geo-utils-cpp** | **24.9** | **15.5** | +| Boost.Geometry | 22.5 | **14.7** | +| GeographicLib | 1.16 | 1.16 | | S2 Geometry | _no public bearing API_ | | ### `contains` (point-in-polygon) @@ -94,52 +105,53 @@ Million queries per second (1 000 query points per iteration). | Library | poly N=10 | poly N=100 | poly N=1 000 | | -------------------- | --------: | ---------: | -----------: | -| **geo-utils-cpp** | **15.9** | 2.82 | 0.321 | -| S2 Geometry | 12.9 | **11.3** | **11.9** | -| Boost.Geometry | 1.85 | 0.23 | 0.024 | +| **geo-utils-cpp** | 16.2 | 2.87 | 0.329 | +| S2 Geometry | **26.7** | **18.0** | **21.2** | +| Boost.Geometry | 1.91 | 0.234 | 0.024 | | GeographicLib | _no native PIP_ | | | -This is where the libraries differ most. **S2's `S2Loop::Contains` exits -early via a bounding-rectangle prefilter** for queries clearly outside the -loop, and uses a tightly inlined edge-crossing routine for the rest — so its -throughput is roughly *constant* in vertex count over our test range. Our -implementation is a textbook O(N) ray cast through edges (rhumb-line by -default), so we beat S2 only on tiny polygons (~10 vertices). From ~100 -vertices onward S2's prefilter dominates: ~4× faster than us at N=100, -~37× faster at N=1 000. Boost.Geometry's spherical `within` traces -*great-circle* edges — a heavier per-edge predicate that explains its -much lower throughput throughout. - -If your workload involves polygons of ~100+ vertices queried at any volume, -S2 wins `contains`. If it's "tiny polygons, or you can't ship a 33 MB -dependency", we beat both S2 and Boost.Geometry, with a ~900× smaller -install. +`contains` is where the libraries differ most. **S2's `S2Loop::Contains` +exits early via a bounding-rectangle prefilter** and uses a tightly +inlined 3D edge-crossing routine — so its algorithmic throughput is +roughly *constant* in vertex count. Our implementation is a textbook O(N) +ray cast through edges (rhumb-line by default), so we lose to S2 even at +N=10 once the per-query lat/lng→S2Point conversion is excluded. With +real-world lat/lng inputs (where S2 *would* pay that conversion per +query), the gap closes for tiny polygons. Boost.Geometry's spherical +`within` traces *great-circle* edges — a heavier per-edge predicate that +explains its much lower throughput throughout. + +If your workload involves `contains` queries at any volume and you can +ship a 33 MB dependency, S2 wins. If you can't ship that — we still beat +Boost.Geometry by ~10× and ship at ~1/900 the size. ### `area` (M polygons/s × vertex count) | Library | N=10 | N=100 | N=1 000 | | -------------------- | -------: | -------: | -------: | -| **geo-utils-cpp** | **69.5** | **66.6** | **65.5** | -| S2 Geometry | 15.6 | 13.7 | 13.4 | -| Boost.Geometry | 43.8 | 35.3 | 36.1 | -| GeographicLib | 1.71 | 1.99 | 2.04 | +| **geo-utils-cpp** | **69.9** | **67.2** | **67.7** | +| S2 Geometry | 16.3 | 14.0 | 13.9 | +| Boost.Geometry | 45.0 | 36.2 | 36.6 | +| GeographicLib | 1.75 | 2.04 | 2.07 | We win clearly on `area` — our spherical-triangle accumulation is a -straight-line loop with no allocation; S2's `S2Loop::GetArea` does more work -per vertex, and Boost.Geometry's strategy machinery costs ~1.8×. +straight-line loop with no allocation; S2's `S2Loop::GetArea` does more +work per vertex, and Boost.Geometry's strategy machinery costs ~1.8×. ### `path_length` (M points/s) | Library | N=10 | N=100 | N=1 000 | | -------------------- | -------: | -------: | -------: | -| **geo-utils-cpp** | **51.3** | **44.9** | **41.3** | -| S2 Geometry | 39.3 | **45.7** | 36.6 | -| Boost.Geometry | 43.8 | 39.2 | 38.8 | -| GeographicLib | 1.50 | 1.25 | 1.20 | +| **geo-utils-cpp** | 54.0 | 46.2 | 41.7 | +| S2 Geometry | **105.1** | **96.7** | **91.6** | +| Boost.Geometry | 48.7 | **43.5** | **40.2** | +| GeographicLib | 1.53 | 1.27 | 1.23 | -Comfortably ahead of S2 at N=10 (~30 %) and N=1 000 (~13 %); tied at -N=100 within noise. Ahead of Boost.Geometry at every size. GeographicLib -pays the ellipsoidal cost. +S2 wins on `path_length` algorithmically (~2×) — once the input is +`S2Point`, segment length is a fast cartesian computation. We tie +Boost.Geometry within noise. GeographicLib pays the ellipsoidal cost. +Note: a lat/lng-input workload would push the S2 column down by the +per-call conversion cost, which is not counted here. ## Disk footprint @@ -165,15 +177,17 @@ the full Boost install is ~362 MB. ## Where each library is the right tool -- **geo-utils-cpp** — tiny polygons, lat/lng inputs, no-deps constraint, - container/mobile bundle size matters. Sphere accuracy is acceptable. -- **S2 Geometry** — many `contains` queries against polygons of ~100+ - vertices, bounding-rectangle prefilter / spatial indexing (`S2ShapeIndex`) - matter, and you can afford a ~33 MB install. Different data model - (3D unit vectors). +- **geo-utils-cpp** — lat/lng-native API, no-deps constraint, + container/mobile bundle size matters, `area` is hot, sphere accuracy is + acceptable. Best when you'd otherwise be paying conversion cost per call. +- **S2 Geometry** — `contains` / `distance` / `path_length` are the hot + path, you can afford a ~33 MB install, and you're willing to keep data + as `S2Point` end-to-end (otherwise the lat/lng→S2Point conversion eats + the algorithmic win). Spatial indexing (`S2ShapeIndex`) available for + bigger workloads. - **Boost.Geometry** — already-Boost project, want one library for many - geometry types and CSes. Slower at PIP than both us and S2; pulls in the - whole `geometry` subset. + geometry types and CSes. Ties us on most operations; loses on `area` + and on `contains`. - **GeographicLib** — sub-meter geodesic accuracy on the WGS84 ellipsoid. Slower by 1–2 orders of magnitude. No PIP. From a96be2047766f2194d5a035cd6f84b5762674190 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Sun, 10 May 2026 15:15:37 +0200 Subject: [PATCH 6/9] Improve README: sharpen positioning, add Conan, reorder bench table --- README.md | 77 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 4289378..f4e78cf 100644 --- a/README.md +++ b/README.md @@ -38,33 +38,50 @@

-Header-only C++17 library for geographic (lat/lng) geometry (no dependencies). +A tiny header-only C++17 library for common latitude/longitude geometry: +distance, heading, polygon area, point-in-polygon, and path proximity checks. -Provides utilities for distance, bearing, polygon area, point-in-polygon, and -path proximity checks on Earth coordinates. +Designed for projects that need practical GPS/lat-lng math without pulling in a +large geometry framework. No dependencies, no build step, and an install +footprint of about 36 KB. -API inspired by Google Maps geometry utilities. -Uses spherical Earth approximation (like Google Maps). +The API is inspired by Google Maps geometry utilities and uses a spherical Earth +approximation, like Google Maps. ## Features -* **`geo::` spherical functions** — distance, bearing, area, interpolation -* **`geo::` polygon functions** — point-in-polygon, path proximity, distance to segments +- **Spherical calculations** — distance, heading, offset, interpolation, and area +- **Polygon utilities** — point-in-polygon and path proximity checks +- **Lat/lng-native API** — work directly with latitude/longitude coordinates +- **Simple integration** — use through CMake, vcpkg, Conan, or by copying the + `include/` directory ## Why use this library? -- Lightweight and header-only (no dependencies) -- Simple API for common GPS/lat-lng calculations -- Suitable for backend, GIS, navigation and tracking systems +- **Tiny install footprint** — about 36 KB of headers, compared with MB-scale + geometry libraries. +- **Lat/lng-native API** — pass latitude/longitude coordinates directly, without + converting to framework-specific point types. +- **Header-only and dependency-free** — easy to vendor or package; no compiled + library needs to be built or linked. +- **Fast common operations** — matches hand-written haversine on distance, stays + competitive with larger libraries, and is especially strong on polygon area. +- **Focused scope** — intentionally small API for common GPS, navigation, + tracking, backend, and GIS-related workflows. ## When not to use -- If you need high-precision geodesic calculations on an ellipsoid -- If you need advanced spatial indexing (use S2 / CGAL instead) +- If you need high-precision ellipsoidal geodesics or sub-meter accuracy, use + GeographicLib. +- If polygon containment is your main hot path, especially for larger polygons, + consider S2 Geometry. +- If you need many geometry types, coordinate systems, or generic geometry + algorithms, Boost.Geometry may be a better fit. +- If you need spatial indexing, use S2, CGAL, or another dedicated spatial index. ## Installation -### FetchContent (recommended) +### FetchContent ```cmake include(FetchContent) @@ -107,6 +124,24 @@ target("your_target") add_packages("geo-utils-cpp") ``` +### Conan + +> Pending Conan Center merge: +> [conan-io/conan-center-index#30152](https://github.com/conan-io/conan-center-index/pull/30152) + +Once the recipe is available in Conan Center: + +```sh +conan install --requires=geo-utils-cpp/1.0.1 --build=missing +``` + +Then in your `CMakeLists.txt`: + +```cmake +find_package(GeoUtilsCpp 1.0.1 REQUIRED) +target_link_libraries(your_target PRIVATE geo::utils) +``` + ### find_package ```cmake @@ -118,7 +153,7 @@ target_link_libraries(your_target PRIVATE geo::utils) Copy the `include/` directory into your project and add it to your include path. -For more details see [docs/getting-started.md](docs/getting-started.md). +For more details, see [docs/getting-started.md](docs/getting-started.md). ## Usage @@ -144,13 +179,13 @@ int main() { `geo-utils-cpp` is a near-zero-overhead wrapper over the math itself, with a tiny disk footprint thanks to header-only + zero dependencies. -| Library | `distance_between` (M pairs/s) | `area` (poly N=100, M polys/s) | Install size | -| -------------------- | -----------------------------: | -----------------------------: | ------------: | -| **geo-utils-cpp** | **40.5** | **67.2** | **36 KB** | -| naive haversine | 38.3 | — | 0 | -| S2 Geometry | 82.9 | 14.0 | 32.8 MB | -| Boost.Geometry | 39.8 | 36.2 | 12.3 MB | -| GeographicLib | 1.2 | 2.0 | 4.6 MB | +| Library | Install size | `distance_between` (M pairs/s) | `area` (poly N=100, M polys/s) | +| -------------------- | ------------: | -----------------------------: | -----------------------------: | +| **geo-utils-cpp** | **36 KB** | **40.5** | **67.2** | +| naive haversine | 0 | 38.3 | — | +| S2 Geometry | 32.8 MB | 82.9 | 14.0 | +| Boost.Geometry | 12.3 MB | 39.8 | 36.2 | +| GeographicLib | 4.6 MB | 1.2 | 2.0 | Apple M1 · clang 17 · `-O2 -DNDEBUG`. Native types pre-built outside the timed loop — numbers reflect algorithmic cost only. Ties Boost.Geometry's From 675206f789f80a89f2621ccf36294a5f0d38f414 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Tue, 19 May 2026 03:05:11 +0200 Subject: [PATCH 7/9] Bench methodology: shared constants, Repetitions(5), CCW orientation, split smoke CI - benchmarks/common/constants.hpp + bench_polygon/bench_queries helpers in random_data.hpp; bench files use shared constants instead of hard-coding (40.0, -74.0, 5.0, 1000) per file. - Every BENCHMARK macro now ->Repetitions(5)->ReportAggregatesOnly(true). - regular_polygon() lng-sign fix: vertices are now actually CCW (positive signed_area in our convention). - queries_around() clamps lat/lng to valid domain. - benchmarks smoke-build moved to its own workflow with paths filter on benchmarks/** + CMakeLists.txt + the workflow file; removed redundant step from ci.yml (doc-only PRs no longer pay for it). --- .github/workflows/benchmarks-smoke.yml | 44 ++++++++++++++++++++++++ .github/workflows/ci.yml | 10 ------ benchmarks/common/constants.hpp | 34 ++++++++++++++++++ benchmarks/common/random_data.hpp | 32 +++++++++++++---- benchmarks/speed/bench_boost.cpp | 20 +++++------ benchmarks/speed/bench_geo_utils.cpp | 18 +++++----- benchmarks/speed/bench_geographiclib.cpp | 11 +++--- benchmarks/speed/bench_naive.cpp | 6 ++-- benchmarks/speed/bench_s2.cpp | 16 ++++----- 9 files changed, 136 insertions(+), 55 deletions(-) create mode 100644 .github/workflows/benchmarks-smoke.yml create mode 100644 benchmarks/common/constants.hpp diff --git a/.github/workflows/benchmarks-smoke.yml b/.github/workflows/benchmarks-smoke.yml new file mode 100644 index 0000000..22470ef --- /dev/null +++ b/.github/workflows/benchmarks-smoke.yml @@ -0,0 +1,44 @@ +# Smoke-build the dependency-free portion of the benchmarks. Triggered only +# on PRs/pushes that actually touch benchmark sources, the root CMake, or +# this workflow file — so doc-only PRs don't pay the CI cost. The benchmarks +# themselves aren't run here (that would require S2 / Boost / GeographicLib +# installs), only `bench_geo_utils` and `bench_naive` which have no external +# deps. The intent is to catch silent breakage in `benchmarks/CMakeLists.txt` +# or the shared `random_data.hpp` / `constants.hpp` headers. + +name: Benchmarks smoke-build + +on: + push: + branches: [master, main] + paths: + - 'benchmarks/**' + - 'CMakeLists.txt' + - '.github/workflows/benchmarks-smoke.yml' + pull_request: + branches: [master, main] + paths: + - 'benchmarks/**' + - 'CMakeLists.txt' + - '.github/workflows/benchmarks-smoke.yml' + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + smoke: + name: smoke-build (ubuntu) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Configure + run: cmake -S . -B build-bench -DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON -DGEO_UTILS_CPP_BUILD_TESTS=OFF -DGEO_UTILS_CPP_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release + + - name: Build (dependency-free benches only) + run: cmake --build build-bench --config Release --target bench_geo_utils bench_naive diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab80d6b..4f11f72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,13 +71,3 @@ jobs: - name: Test run: ctest --test-dir build -C Release --output-on-failure - - # Smoke-build the dependency-free portion of the benchmarks so that - # changes to benchmarks/CMakeLists.txt or the bench source files can't - # silently break the build. We don't run the benchmarks here — that - # requires installing S2/Boost/GeographicLib and is intentionally - # out-of-scope for CI. - - name: Smoke-build benchmarks (no external deps) - run: | - cmake -S . -B build-bench -DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON -DGEO_UTILS_CPP_BUILD_TESTS=OFF -DGEO_UTILS_CPP_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release - cmake --build build-bench --config Release --target bench_geo_utils bench_naive diff --git a/benchmarks/common/constants.hpp b/benchmarks/common/constants.hpp new file mode 100644 index 0000000..4d5c1be --- /dev/null +++ b/benchmarks/common/constants.hpp @@ -0,0 +1,34 @@ +// Copyright 2026 Aleksandr Kovalko +// Licensed under the Apache License, Version 2.0 +// +// Shared compile-time constants used by every benchmark binary. Centralized +// so a single change here updates all five competitor benchmarks and the +// random-data generator. + +#pragma once + +#include + +namespace geo::bench { + +inline constexpr double kPi = 3.14159265358979323846; + +// Earth radius in meters. Matches `geo-utils-cpp`'s internal constant; we +// reuse the same value for Boost.Geometry's haversine strategy and the naive +// baseline so every sphere-based library normalizes to identical units. +// S2 uses `S2Earth::RadiusMeters()` (6371010.0); that ~1 m difference +// affects absolute results but not throughput. +inline constexpr double kEarthRadiusMeters = 6371009.0; + +// Test polygon: regular n-gon centered over NYC, 5° radius. Stays well +// inside ±80° latitude (where ellipsoidal Inverse is numerically safe) and +// is large enough that 2°-jitter queries hit both inside/outside branches +// of contains(). +inline constexpr double kPolyCenterLat = 40.0; +inline constexpr double kPolyCenterLng = -74.0; +inline constexpr double kPolyRadiusDeg = 5.0; + +// Query points per `contains` benchmark iteration. +inline constexpr std::size_t kQueriesPerIteration = 1000; + +} // namespace geo::bench diff --git a/benchmarks/common/random_data.hpp b/benchmarks/common/random_data.hpp index 95bc50c..2be1ee3 100644 --- a/benchmarks/common/random_data.hpp +++ b/benchmarks/common/random_data.hpp @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -14,6 +15,8 @@ #include +#include "constants.hpp" + namespace geo::bench { inline constexpr std::uint64_t kSeed = 0xC0FFEEull; @@ -35,16 +38,18 @@ inline std::vector random_points(std::size_t n, std::uint64_t seed = kSe } // Returns a regular n-gon centered at (clat, clng) with the given radius -// (degrees). Vertices are emitted counter-clockwise (positive signed area -// in our convention). +// (degrees). Vertices are emitted counter-clockwise when viewed with +// latitude on the y-axis and longitude on the x-axis (the standard map +// orientation, looking down from above), so the first vertex is due +// north of the center and successive vertices march west — that's +// positive signed area in our convention. inline std::vector regular_polygon(std::size_t n, double clat, double clng, double radius_deg) { - constexpr double kPi = 3.14159265358979323846; std::vector out; out.reserve(n); for (std::size_t i = 0; i < n; ++i) { double a = 2.0 * kPi * static_cast(i) / static_cast(n); out.emplace_back(clat + radius_deg * std::cos(a), - clng + radius_deg * std::sin(a)); + clng - radius_deg * std::sin(a)); } return out; } @@ -58,10 +63,25 @@ inline std::vector queries_around(double clat, double clng, double radiu std::vector out; out.reserve(n); for (std::size_t i = 0; i < n; ++i) { - out.emplace_back(clat + radius_deg * jitter(rng), - clng + radius_deg * jitter(rng)); + // Clamp to valid lat/lng so callers picking large radii can't produce + // out-of-domain inputs that some libraries silently NaN on. + const double lat = std::clamp(clat + radius_deg * jitter(rng), -90.0, 90.0); + const double lng = std::clamp(clng + radius_deg * jitter(rng), -180.0, 180.0); + out.emplace_back(lat, lng); } return out; } +// Sugar over `regular_polygon` and `queries_around` using the shared test +// polygon constants from constants.hpp. Use these in benchmarks instead of +// hard-coding (40.0, -74.0, 5.0) per-file. +inline std::vector bench_polygon(std::size_t n) { + return regular_polygon(n, kPolyCenterLat, kPolyCenterLng, kPolyRadiusDeg); +} + +inline std::vector bench_queries() { + return queries_around(kPolyCenterLat, kPolyCenterLng, kPolyRadiusDeg, + kQueriesPerIteration); +} + } // namespace geo::bench diff --git a/benchmarks/speed/bench_boost.cpp b/benchmarks/speed/bench_boost.cpp index c8e1ddc..91d7c0f 100644 --- a/benchmarks/speed/bench_boost.cpp +++ b/benchmarks/speed/bench_boost.cpp @@ -41,7 +41,7 @@ using Point = bg::model::point; using LineString = bg::model::linestring; -constexpr double kEarthRadius = 6371009.0; +constexpr double kEarthRadius = geo::bench::kEarthRadiusMeters; inline Point to_boost_point(const geo::LatLng& p) { return Point(p.lng, p.lat); // (lng, lat) order! @@ -81,7 +81,7 @@ static void BM_Boost_DistanceBetween(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_Boost_DistanceBetween)->Arg(1000)->Arg(100000); +BENCHMARK(BM_Boost_DistanceBetween)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); // --- heading (azimuth) ----------------------------------------------------- @@ -97,16 +97,15 @@ static void BM_Boost_Heading(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_Boost_Heading)->Arg(1000)->Arg(100000); +BENCHMARK(BM_Boost_Heading)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); // --- contains: polygon and query points pre-converted outside the loop ---- static void BM_Boost_Contains(benchmark::State& state) { - const auto poly_ll = geo::bench::regular_polygon( - static_cast(state.range(0)), 40.0, -74.0, 5.0); + const auto poly_ll = geo::bench::bench_polygon(static_cast(state.range(0))); const Polygon poly = to_boost_polygon(poly_ll); - const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + const auto queries_ll = geo::bench::bench_queries(); std::vector queries; queries.reserve(queries_ll.size()); for (const auto& q : queries_ll) queries.push_back(to_boost_point(q)); @@ -118,13 +117,12 @@ static void BM_Boost_Contains(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); } -BENCHMARK(BM_Boost_Contains)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_Boost_Contains)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // --- area ------------------------------------------------------------------ static void BM_Boost_Area(benchmark::State& state) { - const auto poly_ll = geo::bench::regular_polygon( - static_cast(state.range(0)), 40.0, -74.0, 5.0); + const auto poly_ll = geo::bench::bench_polygon(static_cast(state.range(0))); const Polygon poly = to_boost_polygon(poly_ll); // bg::area on a spherical CS returns area on the unit sphere (steradians); // multiply by R² to get square meters. @@ -134,7 +132,7 @@ static void BM_Boost_Area(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_Boost_Area)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_Boost_Area)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // --- path_length: LineString pre-built outside the timed loop ------------ @@ -147,4 +145,4 @@ static void BM_Boost_PathLength(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_Boost_PathLength)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_Boost_PathLength)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); diff --git a/benchmarks/speed/bench_geo_utils.cpp b/benchmarks/speed/bench_geo_utils.cpp index c185d88..f830384 100644 --- a/benchmarks/speed/bench_geo_utils.cpp +++ b/benchmarks/speed/bench_geo_utils.cpp @@ -22,7 +22,7 @@ static void BM_GeoUtils_DistanceBetween(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeoUtils_DistanceBetween)->Arg(1000)->Arg(100000); +BENCHMARK(BM_GeoUtils_DistanceBetween)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); // --- heading ---------------------------------------------------------------- @@ -35,14 +35,13 @@ static void BM_GeoUtils_Heading(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeoUtils_Heading)->Arg(1000)->Arg(100000); +BENCHMARK(BM_GeoUtils_Heading)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); // --- contains (point-in-polygon) ------------------------------------------- static void BM_GeoUtils_Contains(benchmark::State& state) { - const auto poly = geo::bench::regular_polygon( - static_cast(state.range(0)), 40.0, -74.0, 5.0); - const auto queries = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + const auto poly = geo::bench::bench_polygon(static_cast(state.range(0))); + const auto queries = geo::bench::bench_queries(); for (auto _ : state) { for (const auto& q : queries) { benchmark::DoNotOptimize(geo::contains(q, poly)); @@ -50,19 +49,18 @@ static void BM_GeoUtils_Contains(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); } -BENCHMARK(BM_GeoUtils_Contains)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_GeoUtils_Contains)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // --- area ------------------------------------------------------------------- static void BM_GeoUtils_Area(benchmark::State& state) { - const auto poly = geo::bench::regular_polygon( - static_cast(state.range(0)), 40.0, -74.0, 5.0); + const auto poly = geo::bench::bench_polygon(static_cast(state.range(0))); for (auto _ : state) { benchmark::DoNotOptimize(geo::area(poly)); } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeoUtils_Area)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_GeoUtils_Area)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // --- path_length ------------------------------------------------------------ @@ -73,4 +71,4 @@ static void BM_GeoUtils_PathLength(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeoUtils_PathLength)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_GeoUtils_PathLength)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); diff --git a/benchmarks/speed/bench_geographiclib.cpp b/benchmarks/speed/bench_geographiclib.cpp index 45c5453..3ad64bf 100644 --- a/benchmarks/speed/bench_geographiclib.cpp +++ b/benchmarks/speed/bench_geographiclib.cpp @@ -50,7 +50,7 @@ static void BM_GeographicLib_DistanceBetween(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeographicLib_DistanceBetween)->Arg(1000)->Arg(100000); +BENCHMARK(BM_GeographicLib_DistanceBetween)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); // --- heading --------------------------------------------------------------- @@ -69,7 +69,7 @@ static void BM_GeographicLib_Heading(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeographicLib_Heading)->Arg(1000)->Arg(100000); +BENCHMARK(BM_GeographicLib_Heading)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); // --- area (PolygonArea, polyline=false) ----------------------------------- // @@ -80,8 +80,7 @@ BENCHMARK(BM_GeographicLib_Heading)->Arg(1000)->Arg(100000); // step to time on its own. static void BM_GeographicLib_Area(benchmark::State& state) { - const auto poly = geo::bench::regular_polygon( - static_cast(state.range(0)), 40.0, -74.0, 5.0); + const auto poly = geo::bench::bench_polygon(static_cast(state.range(0))); for (auto _ : state) { GeographicLib::PolygonArea pa(wgs84(), /*polyline=*/false); for (const auto& p : poly) pa.AddPoint(p.lat, p.lng); @@ -92,7 +91,7 @@ static void BM_GeographicLib_Area(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeographicLib_Area)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_GeographicLib_Area)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // --- path_length (PolygonArea with polyline=true) ------------------------- @@ -108,7 +107,7 @@ static void BM_GeographicLib_PathLength(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_GeographicLib_PathLength)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_GeographicLib_PathLength)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // Note: contains() benchmark intentionally omitted — GeographicLib does not // provide a native point-in-polygon predicate. This is a real capability gap, diff --git a/benchmarks/speed/bench_naive.cpp b/benchmarks/speed/bench_naive.cpp index fb3ce92..271f683 100644 --- a/benchmarks/speed/bench_naive.cpp +++ b/benchmarks/speed/bench_naive.cpp @@ -14,8 +14,8 @@ namespace { -constexpr double kEarthRadius = 6371009.0; -constexpr double kPi = 3.14159265358979323846; +constexpr double kEarthRadius = geo::bench::kEarthRadiusMeters; +constexpr double kPi = geo::bench::kPi; inline double naive_distance(geo::LatLng a, geo::LatLng b) noexcept { const double lat1 = a.lat * kPi / 180.0; @@ -39,4 +39,4 @@ static void BM_Naive_DistanceBetween(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_Naive_DistanceBetween)->Arg(1000)->Arg(100000); +BENCHMARK(BM_Naive_DistanceBetween)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); diff --git a/benchmarks/speed/bench_s2.cpp b/benchmarks/speed/bench_s2.cpp index c266376..3c1855a 100644 --- a/benchmarks/speed/bench_s2.cpp +++ b/benchmarks/speed/bench_s2.cpp @@ -57,17 +57,16 @@ static void BM_S2_DistanceBetween(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_S2_DistanceBetween)->Arg(1000)->Arg(100000); +BENCHMARK(BM_S2_DistanceBetween)->Arg(1000)->Arg(100000)->Repetitions(5)->ReportAggregatesOnly(true); // --- contains: polygon and query points pre-converted outside the loop --- static void BM_S2_Contains(benchmark::State& state) { - const auto poly_ll = geo::bench::regular_polygon( - static_cast(state.range(0)), 40.0, -74.0, 5.0); + const auto poly_ll = geo::bench::bench_polygon(static_cast(state.range(0))); auto loop = std::make_unique(to_s2_points(poly_ll)); loop->Normalize(); // S2Loop expects CCW; Normalize flips if needed. - const auto queries_ll = geo::bench::queries_around(40.0, -74.0, 5.0, 1000); + const auto queries_ll = geo::bench::bench_queries(); const auto queries = to_s2_points(queries_ll); for (auto _ : state) { @@ -77,13 +76,12 @@ static void BM_S2_Contains(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * static_cast(queries.size())); } -BENCHMARK(BM_S2_Contains)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_S2_Contains)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // --- area ------------------------------------------------------------------ static void BM_S2_Area(benchmark::State& state) { - const auto poly_ll = geo::bench::regular_polygon( - static_cast(state.range(0)), 40.0, -74.0, 5.0); + const auto poly_ll = geo::bench::bench_polygon(static_cast(state.range(0))); auto loop = std::make_unique(to_s2_points(poly_ll)); loop->Normalize(); @@ -93,7 +91,7 @@ static void BM_S2_Area(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_S2_Area)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_S2_Area)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); // --- path_length: S2Polyline pre-built outside the timed loop ------------ @@ -105,4 +103,4 @@ static void BM_S2_PathLength(benchmark::State& state) { } state.SetItemsProcessed(state.iterations() * state.range(0)); } -BENCHMARK(BM_S2_PathLength)->Arg(10)->Arg(100)->Arg(1000); +BENCHMARK(BM_S2_PathLength)->Arg(10)->Arg(100)->Arg(1000)->Repetitions(5)->ReportAggregatesOnly(true); From 0ab9ec14f89774c6c51d19aa89146c752b24a3a1 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Tue, 19 May 2026 03:05:48 +0200 Subject: [PATCH 8/9] Docs: reframe deployment footprint, compress TL;DR, dedup across files - docs/benchmarks.md: "Disk footprint" -> "Deployment footprint"; drop "fraction of the disk footprint" / "33 MB install" claims as headline arguments; lean on dependency-free + lat/lng-native + S2Point-native data as the technical criteria. TL;DR compressed to scannable bullets, bold rule applied strictly at ~5%, N/A rows unified, polygon-area footnote added, naive-haversine baseline disclaimer in distance commentary, PolygonArea apples-to-apples note added. - README.md: merged Features + Why-use into one list, deduped install boilerplate (single find_package block), added polygon example demonstrating contains/area/path_length/on_path, replaced std::boolalpha with inline ternaries, "about 36 KB across 4 headers". - docs/api.md: merged Conventions + Numerical notes; expanded with Earth model + Google Maps reference, thread safety, error handling (noexcept story, nullopt/extrapolate semantics), include strategy. Added on_path and angle_between examples; offset_origin round-trip + nullopt example; readable comments on distance/path_length/area examples; geodesic-defaults note at top of Polygon functions. - benchmarks/README.md: drop Competitors table and Methodology notes (already in docs/benchmarks.md); reduced to install + build + measure mechanics. --- README.md | 172 +++++++++++++++++++++------------------ benchmarks/README.md | 37 ++------- docs/api.md | 179 +++++++++++++++++++++++++++------------- docs/benchmarks.md | 190 ++++++++++++++++++++++++++----------------- 4 files changed, 335 insertions(+), 243 deletions(-) diff --git a/README.md b/README.md index f4e78cf..d60b07c 100644 --- a/README.md +++ b/README.md @@ -38,46 +38,27 @@

-A tiny header-only C++17 library for common latitude/longitude geometry: -distance, heading, polygon area, point-in-polygon, and path proximity checks. +Practical latitude/longitude geometry for C++17 projects that need GPS math, +not a full geometry framework. -Designed for projects that need practical GPS/lat-lng math without pulling in a -large geometry framework. No dependencies, no build step, and an install -footprint of about 36 KB. +Distance, heading, polygon area, point-in-polygon, and path proximity checks — +header-only, no dependencies, no build step. -The API is inspired by Google Maps geometry utilities and uses a spherical Earth -approximation, like Google Maps. +The API is inspired by Google Maps geometry utilities and uses the same spherical +Earth approximation model. ## Features -- **Spherical calculations** — distance, heading, offset, interpolation, and area -- **Polygon utilities** — point-in-polygon and path proximity checks -- **Lat/lng-native API** — work directly with latitude/longitude coordinates -- **Simple integration** — use through CMake, vcpkg, Conan, or by copying the - `include/` directory - -## Why use this library? - -- **Tiny install footprint** — about 36 KB of headers, compared with MB-scale - geometry libraries. -- **Lat/lng-native API** — pass latitude/longitude coordinates directly, without - converting to framework-specific point types. -- **Header-only and dependency-free** — easy to vendor or package; no compiled - library needs to be built or linked. -- **Fast common operations** — matches hand-written haversine on distance, stays - competitive with larger libraries, and is especially strong on polygon area. -- **Focused scope** — intentionally small API for common GPS, navigation, - tracking, backend, and GIS-related workflows. - -## When not to use - -- If you need high-precision ellipsoidal geodesics or sub-meter accuracy, use - GeographicLib. -- If polygon containment is your main hot path, especially for larger polygons, - consider S2 Geometry. -- If you need many geometry types, coordinate systems, or generic geometry - algorithms, Boost.Geometry may be a better fit. -- If you need spatial indexing, use S2, CGAL, or another dedicated spatial index. +- **Lat/lng-native API** — pass latitude/longitude coordinates directly, no + framework-specific point types to convert through. +- **Header-only, dependency-free** — about 36 KB across 4 headers; nothing + to build or link. +- **Spherical math** — distance, heading, offset, interpolation, area. +- **Polygon utilities** — point-in-polygon and path proximity checks. +- **Fast** — matches hand-written haversine on `distance`; especially strong + on polygon `area` (see [benchmarks](docs/benchmarks.md)). +- **Focused scope** — intentionally small API for GPS, navigation, tracking, + backend, and GIS workflows. ## Installation @@ -102,13 +83,6 @@ target_link_libraries(your_target PRIVATE geo::utils) vcpkg install geo-utils-cpp ``` -Then in your `CMakeLists.txt`: - -```cmake -find_package(GeoUtilsCpp 1.0.1 REQUIRED) -target_link_libraries(your_target PRIVATE geo::utils) -``` - ### xrepo ```sh @@ -126,37 +100,33 @@ target("your_target") ### Conan -> Pending Conan Center merge: -> [conan-io/conan-center-index#30152](https://github.com/conan-io/conan-center-index/pull/30152) - -Once the recipe is available in Conan Center: - ```sh conan install --requires=geo-utils-cpp/1.0.1 --build=missing ``` -Then in your `CMakeLists.txt`: +Conan Center support is pending +[conan-io/conan-center-index#30152](https://github.com/conan-io/conan-center-index/pull/30152) -```cmake -find_package(GeoUtilsCpp 1.0.1 REQUIRED) -target_link_libraries(your_target PRIVATE geo::utils) -``` +### Manual + +Copy the `include/` directory into your project and add it to your include path. + +### Using it from CMake -### find_package +With any of the above methods (vcpkg, Conan, FetchContent, or a system +`find_package`), wire it into your build with: ```cmake find_package(GeoUtilsCpp 1.0.1 REQUIRED) target_link_libraries(your_target PRIVATE geo::utils) ``` -### Manual - -Copy the `include/` directory into your project and add it to your include path. - For more details, see [docs/getting-started.md](docs/getting-started.md). ## Usage +Distance and heading between two points: + ```cpp #include @@ -174,30 +144,74 @@ int main() { } ``` +Polygon area, point-in-polygon, path length, and path proximity: + +```cpp +#include +#include + +#include + +int main() { + // A small box around midtown Manhattan (vertices in CCW order). + std::vector midtown = { + {40.74, -74.01}, {40.74, -73.96}, {40.78, -73.96}, {40.78, -74.01}, + }; + geo::LatLng timesSquare{40.7580, -73.9855}; + + std::cout << "Times Square inside: " + << (geo::contains(timesSquare, midtown) ? "true" : "false") << "\n"; + std::cout << "Polygon area: " + << geo::area(midtown) / 1e6 << " km^2\n"; + + // A short polyline along Broadway, and a point near it. + std::vector route = { + {40.7580, -73.9855}, // Times Square + {40.7680, -73.9818}, // Columbus Circle + {40.7780, -73.9740}, // Lincoln Center + }; + geo::LatLng nearby{40.7670, -73.9820}; + + std::cout << "Route length: " + << geo::path_length(route) / 1000.0 << " km\n"; + std::cout << "Point within 200 m of route: " + << (geo::on_path(nearby, route, /*geodesic=*/true, /*tolerance=*/200.0) + ? "true" : "false") + << "\n"; +} +``` + ## Benchmarks -`geo-utils-cpp` is a near-zero-overhead wrapper over the math itself, with a -tiny disk footprint thanks to header-only + zero dependencies. - -| Library | Install size | `distance_between` (M pairs/s) | `area` (poly N=100, M polys/s) | -| -------------------- | ------------: | -----------------------------: | -----------------------------: | -| **geo-utils-cpp** | **36 KB** | **40.5** | **67.2** | -| naive haversine | 0 | 38.3 | — | -| S2 Geometry | 32.8 MB | 82.9 | 14.0 | -| Boost.Geometry | 12.3 MB | 39.8 | 36.2 | -| GeographicLib | 4.6 MB | 1.2 | 2.0 | - -Apple M1 · clang 17 · `-O2 -DNDEBUG`. Native types pre-built outside the -timed loop — numbers reflect algorithmic cost only. Ties Boost.Geometry's -spherical strategy on `distance` / `heading` / `path_length` within noise; -**wins clearly on `area`** (allocation-free triangle-fan accumulator). S2 -is faster algorithmically on `distance` / `path_length` / `contains` — but -pays a per-call `lat/lng → S2Point` conversion in real-world lat/lng -workloads (not counted here). **130–900× smaller install footprint** than -the alternatives. Zero overhead over hand-written haversine. - -See [docs/benchmarks.md](docs/benchmarks.md) for the full methodology, all -operations, and a discussion of when to reach for each library. +`geo-utils-cpp` is header-only with no runtime dependencies. Throughput on +Apple M1 / clang 17 / `-O2 -DNDEBUG` (higher is better): + +| Library | `distance_between` (M pairs/s) | `area` (poly N=100, M polys/s) | +| -------------------- | -----------------------------: | -----------------------------: | +| **geo-utils-cpp** | **40.5** | **67.2** | +| naive haversine | 38.3 | — | +| S2 Geometry | 82.9 | 14.0 | +| Boost.Geometry | 39.8 | 36.2 | +| GeographicLib | 1.2 | 2.0 | + +Native types are pre-built outside the timed loop, so the table compares +algorithmic cost rather than object-construction overhead. `geo-utils-cpp` +matches hand-written haversine and Boost.Geometry on simple spherical operations, +is especially strong on `area`, while S2 is faster on several operations when +conversion from lat/lng is excluded. + +See [docs/benchmarks.md](docs/benchmarks.md) for full methodology, all +operations, and when to use each library. + +## When not to use + +- If you need high-precision ellipsoidal geodesics or sub-meter accuracy, use + GeographicLib. +- If polygon containment is your main hot path, especially for larger polygons, + consider S2 Geometry. +- If you need many geometry types, coordinate systems, or generic geometry + algorithms, Boost.Geometry may be a better fit. +- If you need spatial indexing, use S2, CGAL, or another dedicated spatial index. ## API Reference diff --git a/benchmarks/README.md b/benchmarks/README.md index 85a6e9d..3141474 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -11,21 +11,9 @@ libraries on two axes: The benchmarks are **not built by default** and **not run in CI**. Set `-DGEO_UTILS_CPP_BUILD_BENCHMARKS=ON` to opt in. -For a high-level summary of results and methodology see -[`docs/benchmarks.md`](../docs/benchmarks.md). - -## Competitors - -| Library | Model | Notes | -| -------------------- | ----------- | ------------------------------------------------- | -| **geo-utils-cpp** | sphere | This library — header-only, zero deps | -| **naive haversine** | sphere | ~30 lines of inline math, no library | -| **S2 Geometry** | sphere | Google's mapping library; depends on abseil | -| **Boost.Geometry** | sphere\* | `cs::spherical_equatorial` strategy | -| **GeographicLib** | ellipsoid | Karney's iterative geodesic — more accurate, slower; **no native point-in-polygon** | - -\* Boost.Geometry can also do ellipsoidal; we use the spherical strategy here -to compare apples-to-apples on the algorithm we ourselves implement. +Results, methodology, and per-library trade-offs live in +[`docs/benchmarks.md`](../docs/benchmarks.md). This file covers only the +mechanics: installing competitors, building, and running. ## Installing competitors @@ -90,20 +78,5 @@ done This builds a minimal "distance + point-in-polygon" consumer against every library it can find, strips the resulting binary, and reports both the binary size and the on-disk install size of each library. Override compiler -or flags via `CXX=` and `CXXFLAGS=`. - -## Methodology notes - -- **Same inputs everywhere.** All benchmarks pull data from - `common/random_data.hpp`, which is seeded deterministically. -- **Native types are pre-built outside the timing loop** for every library. - The timed work is the library's per-call computation, isolated from - `lat/lng → native-type` plumbing — so the numbers compare algorithmic cost, - not data-shape conversion overhead. (`geo-utils-cpp` accepts lat/lng - directly, so it has nothing to pre-build; that is a separate API-shape - advantage and not part of these speed numbers.) -- **Polygons are pre-built** outside the timing loop because in real code a - geofence is a one-time setup cost. -- **Don't compare GeographicLib's distance/heading head-to-head as "speed".** - It computes a different (more accurate) thing on the WGS84 ellipsoid. - Treat it as a "trade-off" data point. +or flags via `CXX=` and `CXXFLAGS=`; pass `STATIC=1` to build statically +linked binaries instead. diff --git a/docs/api.md b/docs/api.md index 77b258d..cfa67f8 100644 --- a/docs/api.md +++ b/docs/api.md @@ -5,15 +5,29 @@ are internal and not part of the supported API. ## Conventions -- Coordinates are in degrees (latitude, longitude) -- Distances are in meters -- Angles are in degrees unless otherwise specified -- Earth model: spherical (mean radius = 6371009 m) - -## Numerical notes - -- Results are approximate due to floating point arithmetic -- Precision decreases near the poles and for antipodal points +- **Units.** Coordinates are in degrees (latitude, longitude); distances + in meters; angles in degrees unless otherwise noted. +- **Earth model.** Spherical, mean radius 6371009 m — the same model + Google Maps geometry utilities use. See [benchmarks.md](benchmarks.md) + for the trade-off vs ellipsoidal libraries like GeographicLib. +- **Precision.** Floating-point arithmetic; precision degrades near the + poles and for antipodal pairs. +- **Thread safety.** All functions are pure (no global mutable state, no + caching) and safe to call concurrently from multiple threads. +- **Error handling.** Scalar functions (`heading`, `offset`, + `interpolate`, `distance_between`, `distance_to_segment`) are + `noexcept`. Out-of-domain inputs are not asserted: `interpolate` with + `fraction` outside `[0, 1]` extrapolates along the great circle, + `offset_origin` returns `std::nullopt` when no origin can be computed, + and pathological inputs may yield NaN. Container-taking functions + (`area`, `path_length`, `contains`, `on_edge`, `on_path`) are not + marked `noexcept` because the generic `Path` contract doesn't + constrain `operator[]` / `size()` to be `noexcept`; they don't throw + themselves. +- **Include strategy.** Each subsystem has its own header: + `` (types), `` (distance, heading, + area), `` (point-in-polygon, on-path). The umbrella + `` pulls all three in for convenience. ## LatLng @@ -25,8 +39,8 @@ Represents a geographic coordinate (latitude, longitude) in degrees. `geo::LatLng` — a point in geographical coordinates: latitude and longitude. -* Latitude ranges between `-90` and `90` degrees, inclusive -* Longitude ranges between `-180` and `180` degrees, inclusive. Note: `180` and `-180` are treated as equal. +- Latitude ranges between `-90` and `90` degrees, inclusive +- Longitude ranges between `-180` and `180` degrees, inclusive. Note: `180` and `-180` are treated as equal. ```cpp geo::LatLng northPole{90, 0}; @@ -72,6 +86,18 @@ std::vector aroundNorthPole = { {89, 0}, {89, 120}, {89, -120} }; std::array northPole = { {90, 0} }; ``` +> **Note.** A braced-init-list cannot be passed directly to a `Path` +> template parameter — it has no `operator[]` and no deducible type. +> Wrap it in a container: +> +> ```cpp +> // Won't compile: +> geo::area({{0, 0}, {0, 10}, {10, 0}}); +> +> // Wrap in a vector: +> geo::area(std::vector{{0, 0}, {0, 10}, {10, 0}}); +> ``` + --- ## Spherical functions @@ -86,17 +112,17 @@ Spherical geometry utilities for computing angles, distances, and areas. **`geo::heading(const LatLng& from, const LatLng& to)`** — Returns the heading from one LatLng to another. Headings are expressed in degrees clockwise from North within the range `[-180, 180)`. -* `from` — the starting point -* `to` — the destination point +- `from` — the starting point +- `to` — the destination point -Returns: `double` — the heading in degrees clockwise from north +Returns: `double` — heading in degrees clockwise from north, in `[-180, 180)`. ```cpp -geo::LatLng front{0, 0}; -geo::LatLng right{0, 90}; +geo::LatLng equator{0, 0}; // on the equator at lng=0 +geo::LatLng east{0, 90}; // 90° east along the equator -std::cout << geo::heading(right, front); // -90 -std::cout << geo::heading(front, right); // +90 +std::cout << geo::heading(equator, east); // +90 (due east) +std::cout << geo::heading(east, equator); // -90 (due west) ``` --- @@ -105,16 +131,16 @@ std::cout << geo::heading(front, right); // +90 **`geo::offset(const LatLng& from, double distance, double heading)`** — Returns the LatLng resulting from moving a distance from an origin in the specified heading (degrees clockwise from north). -* `from` — the starting point -* `distance` — the distance to travel, in meters -* `heading` — the heading in degrees clockwise from north +- `from` — the starting point +- `distance` — the distance to travel, in meters +- `heading` — the heading in degrees clockwise from north -Returns: `LatLng` — the destination point +Returns: `LatLng` — the destination point. ```cpp geo::LatLng front{0, 0}; -// quarter-circumference of Earth ≈ 10,007.5 km +// Quarter-circumference of Earth: π·R/2 ≈ 10,007.5 km (R = 6371009 m). constexpr double quarter = 10'007'543.4; auto up = geo::offset(front, quarter, 0); // { 90, 0} @@ -127,19 +153,28 @@ auto right = geo::offset(front, quarter, 90); // { 0, 90} ### offset_origin -**`geo::offset_origin(const LatLng& to, double distance, double heading)`** — Returns the origin point that, when travelling `distance` meters at `heading`, arrives at `to`. Returns `std::nullopt` when no solution exists. +**`geo::offset_origin(const LatLng& to, double distance, double heading)`** — Returns the origin point that, when travelling `distance` meters at `heading`, arrives at `to`. Returns `std::nullopt` when no origin can be computed — including the degenerate cases at the poles. -* `to` — the destination point -* `distance` — the distance travelled, in meters -* `heading` — the heading in degrees clockwise from north +- `to` — the destination point +- `distance` — the distance travelled, in meters +- `heading` — the heading in degrees clockwise from north -Returns: `std::optional` — the origin, or `std::nullopt` if unreachable +Returns: `std::optional` — the origin, or `std::nullopt` if unreachable. ```cpp -geo::LatLng front{0, 0}; - -auto r0 = geo::offset_origin(front, 0, 0); -assert(r0.has_value() && front == r0.value()); +geo::LatLng start{40.0, -74.0}; +constexpr double distance = 5'000'000.0; // 5,000 km +constexpr double heading = 60.0; // east-northeast + +// Round-trip: offset_origin undoes offset. +auto destination = geo::offset(start, distance, heading); +auto origin = geo::offset_origin(destination, distance, heading); +assert(origin.has_value() && start.approx_equal(origin.value(), 1e-6)); + +// nullopt at the pole with quarter-circumference east heading — all +// directions are degenerate there, no origin solution exists. +auto pole = geo::offset_origin(geo::LatLng{90, 0}, 10'007'543.4, 90.0); +assert(!pole.has_value()); ``` --- @@ -148,16 +183,17 @@ assert(r0.has_value() && front == r0.value()); **`geo::interpolate(const LatLng& from, const LatLng& to, double fraction)`** — Returns the LatLng which lies the given fraction of the way between the origin and the destination (spherical linear interpolation). -* `from` — the starting point -* `to` — the destination point -* `fraction` — a value in `[0, 1]` +- `from` — the starting point +- `to` — the destination point +- `fraction` — typically in `[0, 1]`; values outside extrapolate along the same great-circle arc -Returns: `LatLng` +Returns: `LatLng` — the interpolated point on the great-circle arc from `from` to `to`. ```cpp geo::LatLng up{90, 0}; geo::LatLng front{0, 0}; +// The arc from equator to pole spans 90°, so fraction 1/90 → 1° latitude. assert(geo::LatLng{1, 0} == geo::interpolate(front, up, 1 / 90.0)); assert(geo::LatLng{89, 0} == geo::interpolate(front, up, 89 / 90.0)); ``` @@ -166,50 +202,62 @@ assert(geo::LatLng{89, 0} == geo::interpolate(front, up, 89 / 90.0)); ### angle_between -**`geo::angle_between(const LatLng& from, const LatLng& to)`** — Returns the central angle between two points, in radians. +**`geo::angle_between(const LatLng& from, const LatLng& to)`** — Returns the central angle (great-circle arc) between two points, in radians. -Returns: `double` +Returned in radians, not degrees, because the value equals the great-circle +arc length on the unit sphere — multiply by Earth's mean radius (6371009 m) +to get meters. This is exactly how `distance_between` is implemented. + +Returns: `double` — central angle in radians, in `[0, π]`. + +```cpp +geo::LatLng pole{90, 0}; +geo::LatLng equator{0, 0}; + +double angle = geo::angle_between(pole, equator); // π/2 ≈ 1.5708 +double meters = angle * 6371009.0; // ≈ 1.001e+07 (quarter-circumference) +``` --- ### distance_between -**`geo::distance_between(const LatLng& from, const LatLng& to)`** — Returns the distance between two points, in meters. +**`geo::distance_between(const LatLng& from, const LatLng& to)`** — Returns the great-circle distance between two points, in meters. -Returns: `double` +Returns: `double` — distance in meters, in `[0, π·R]` (i.e. up to half Earth's circumference). ```cpp geo::LatLng up{90, 0}; geo::LatLng down{-90, 0}; -std::cout << geo::distance_between(up, down); // ~2.00151e+07 +std::cout << geo::distance_between(up, down); // ~20,015 km (π·R, half Earth's circumference) ``` --- ### path_length -**`geo::path_length(const Path& path)`** — Returns the length of the given path, in meters. +**`geo::path_length(const Path& path)`** — Returns the total length of the polyline (sum of great-circle distances between consecutive points), in meters. -Returns: `double` +Returns: `double` — total length in meters; `0` for a path with fewer than 2 points. ```cpp std::vector path = { {0, 0}, {90, 0}, {0, 90} }; -std::cout << geo::path_length(path); // ~20,015,087 m (pi*R) +std::cout << geo::path_length(path); // ~20,015 km (π·R, half Earth's circumference) ``` --- ### area -**`geo::area(const Path& path)`** — Returns the area of a closed path on Earth, in square meters. +**`geo::area(const Path& path)`** — Returns the area of a closed path on Earth, in square meters. The path is implicitly closed (last vertex connects back to the first); equivalent to `std::abs(signed_area(path))`. -Returns: `double` +Returns: `double` — signed area in square meters; positive for CCW, negative for CW. ```cpp // Lune bounded by meridians 0 and 90 — one quarter of the Earth's surface. std::vector path = { {0, 90}, {-90, 0}, {0, 0}, {90, 0}, {0, 90} }; -std::cout << geo::area(path); // ~pi*R^2 (one quarter of the total 4*pi*R^2) +std::cout << geo::area(path); // ~1.275e+14 m² (π·R², one quarter of Earth's surface) ``` --- @@ -246,13 +294,18 @@ Utilities for computations involving polygons and polylines. #include ``` +> **Note on `geodesic` defaults.** `contains` defaults to rhumb-line +> edges (cheaper, fine for polygons well inside one hemisphere); +> `on_edge` and `on_path` default to great-circle edges (more accurate, +> especially near the poles). Pass `geodesic` explicitly when in doubt. + ### contains **`geo::contains(const LatLng& point, const Path& polygon, bool geodesic = false)`** — Returns whether the given point lies inside the specified polygon. The polygon is always considered closed. The South Pole is always outside. -* `geodesic` — `true` for great circle edges, `false` for rhumb edges +- `geodesic` — `false` (default) for rhumb-line edges, `true` for great-circle edges. See the note at the top of this section. -Returns: `bool` +Returns: `bool` — `true` if `point` is inside the polygon, `false` otherwise. ```cpp std::vector aroundNorthPole = { {89, 0}, {89, 120}, {89, -120} }; @@ -265,9 +318,12 @@ std::cout << geo::contains(geo::LatLng{-90, 0}, aroundNorthPole); // false ### on_edge -**`geo::on_edge(const LatLng& point, const Path& polygon, bool geodesic = true, double tolerance = geo::kDefaultTolerance)`** — Returns whether the given point lies on or near a polygon edge, within `tolerance` meters. +**`geo::on_edge(const LatLng& point, const Path& polygon, bool geodesic = true, double tolerance = geo::kDefaultTolerance)`** — Returns whether the given point lies on or near a polygon edge (including the closing segment between the last and first vertices), within `tolerance` meters. + +- `geodesic` — `true` (default) for great-circle edges, `false` for rhumb-line edges. See the note at the top of this section. +- `tolerance` — maximum distance in meters between `point` and the nearest edge to still count as "on"; defaults to `geo::kDefaultTolerance` (0.1 m). -Returns: `bool` +Returns: `bool` — `true` if `point` is within `tolerance` of any edge. ```cpp std::vector equator = { {0, 90}, {0, 180} }; @@ -280,24 +336,35 @@ std::cout << geo::on_edge(geo::LatLng{0, 90 - 2e-6}, equator); // false ### on_path -**`geo::on_path(const LatLng& point, const Path& polyline, bool geodesic = true, double tolerance = geo::kDefaultTolerance)`** — Returns whether the given point lies on or near a polyline, within `tolerance` meters. The closing segment between the first and last points is **not** included. +**`geo::on_path(const LatLng& point, const Path& polyline, bool geodesic = true, double tolerance = geo::kDefaultTolerance)`** — Returns whether the given point lies on or near a polyline, within `tolerance` meters. The closing segment between the first and last points is **not** included — that's the only difference vs `on_edge`. + +- `geodesic` — `true` (default) for great-circle edges, `false` for rhumb-line edges. See the note at the top of this section. +- `tolerance` — maximum distance in meters; defaults to `geo::kDefaultTolerance` (0.1 m). -Returns: `bool` +Returns: `bool` — `true` if `point` is within `tolerance` of any segment of the polyline. + +```cpp +std::vector polyline = { {0, 0}, {0, 10}, {10, 10}, {10, 0} }; + +std::cout << geo::on_path(geo::LatLng{0, 5}, polyline); // true — on first segment +std::cout << geo::on_path(geo::LatLng{5, 0}, polyline); // false — closing edge (10,0)→(0,0) is excluded +``` --- ### distance_to_segment -**`geo::distance_to_segment(const LatLng& point, const LatLng& start, const LatLng& end)`** — Returns the distance in meters from `point` to the line segment `[start, end]` on the sphere. +**`geo::distance_to_segment(const LatLng& point, const LatLng& start, const LatLng& end)`** — Returns the distance in meters from `point` to the closest point on the line segment `[start, end]` on the sphere. If the perpendicular foot falls outside the segment, returns the distance to the nearer endpoint. -Returns: `double` +Returns: `double` — distance in meters, always ≥ 0. ```cpp geo::LatLng start{28.05359, -82.41632}; geo::LatLng end{28.05310, -82.41634}; geo::LatLng point{28.05342, -82.41594}; -std::cout << geo::distance_to_segment(point, start, end); // ~37.95 +// Point lies ~38 m east-northeast of the segment +std::cout << geo::distance_to_segment(point, start, end); ``` --- diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 92ffe59..55e799d 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -1,33 +1,27 @@ # Benchmarks -`geo-utils-cpp` aims to be **as fast as a hand-written haversine** with a -**fraction of the disk footprint** of the popular alternatives. This page -shows the numbers behind that claim and how to reproduce them. +`geo-utils-cpp` is a small, dependency-free lat/lng geometry library that +stays close to hand-written spherical math while avoiding a full geometry +framework dependency. This page shows the benchmark results, methodology, +and trade-offs against S2 Geometry, Boost.Geometry, and GeographicLib. For build and run instructions see [`benchmarks/README.md`](../benchmarks/README.md). ## TL;DR -- **Speed.** On the algorithm itself (native types pre-built): ties - Boost.Geometry's spherical strategy on `distance` / `heading` / - `path_length` (within noise), wins clearly on `area`, and matches a - hand-written haversine on `distance`. S2 Geometry's 3D-cartesian model - is **faster** than us on `distance`, `path_length`, and `contains` — but - it pays a per-call `lat/lng → S2Point` conversion in real-world - lat/lng-input workloads (not counted in these algorithm-only numbers). - ~20–30× faster than GeographicLib's WGS84 geodesic — but **less - accurate**, on a sphere. -- **Disk footprint.** **36 KB** of headers vs **32.8 MB** for S2 (with - abseil), **12.3 MB** for Boost.Geometry's `geometry` subset alone, - **4.6 MB** for GeographicLib — a ~130–900× difference in what you have - to ship or have on disk. -- **Where competitors win.** S2 wins on `contains` (its 3D edge-crossing - plus a bounding-rectangle prefilter make throughput roughly constant in - vertex count) and on `distance` / `path_length` once you exclude - conversion. If those are your hot path *and* you can ship a 33 MB - dependency, S2 is the right tool. **Where we win.** `area` (allocation- - free triangle-fan accumulator), and any workload where shipping a - multi-MB dependency is a non-starter. +- **Speed.** Matches Boost.Geometry's spherical strategy and hand-written + haversine on `distance` / `heading`; wins on `area`; loses `contains` + and `path_length` to S2, which pays a hidden `lat/lng → S2Point` + conversion in real workloads. ~30× faster than GeographicLib's WGS84 + geodesic — but on a sphere, less accurate. +- **Deployment footprint.** Header-only, zero deps — nothing to add to + your build's dependency tree, and no `.so`/`.dylib` to ship alongside + the binary. +- **When each library wins.** S2 wins on `contains` and on several + algorithm-only speed tests, especially if your data already lives as + `S2Point` end-to-end. geo-utils-cpp is best-in-class on `area`, and on + lat/lng-input workloads where adding a geometry framework to your + build isn't an option. Full per-library guidance is in [Where each library is the right tool](#where-each-library-is-the-right-tool). ## Methodology @@ -61,6 +55,15 @@ but is not what the speed numbers below measure. - **GeographicLib vs us:** uses Karney's iterative WGS84 geodesic. Slower *and* more accurate. Treat as a trade-off data point, not a "we are faster" claim. +- **GeographicLib `PolygonArea` is timed differently** for `area` / + `path_length`. It's an incremental accumulator: `AddPoint` itself does + the per-vertex geodesic `Inverse()` call, and `Compute()` only + finalizes the closing edge. We therefore measure the full + `PolygonArea + N×AddPoint + Compute()` pattern inside the timed loop + — that's what computing the area of an N-vertex polygon actually + costs in GeographicLib. The other libraries pre-build native types + outside the loop; for GeographicLib there is no separable "pre-build" + step to lift. - **GeographicLib has no native point-in-polygon.** Real capability gap. - **S2 has no public initial-bearing API.** Same. @@ -79,10 +82,15 @@ to reproduce. **Bold** number = column winner, or co-winners within ~5% | naive haversine | 38.3 | 26.0 | | S2 Geometry | **82.9** | **29.1** | | Boost.Geometry | 39.8 | **28.8** | -| GeographicLib (WGS84) | 1.25 | 1.24 | +| GeographicLib | 1.25 | 1.24 | We tie naive haversine and Boost.Geometry's spherical strategy within -noise — zero overhead from being a library. S2 is **2× faster at small N** +noise — zero overhead from being a library. The "naive" baseline is a +deliberately textbook haversine (recomputes `* π / 180` per call, no +trig caching); `geo-utils-cpp`'s actual implementation is hand-optimized +(cached `deg2rad` factors, combined `arc_hav` reductions), so "ties +naive" really means "the optimized library version is no slower than a +hand-rolled one-liner — overhead is zero". S2 is **2× faster at small N** because once the input is `S2Point` the per-pair distance reduces to a dot product / `acos`, cheaper than haversine; the gap closes at N=100 000 where all three become memory-bandwidth bound. Note: in real-world workloads @@ -95,35 +103,40 @@ substantially more accurate on long-distance pairs). | Library | N=1 000 | N=100 000 | | ---------------------- | -------: | --------: | | **geo-utils-cpp** | **24.9** | **15.5** | -| Boost.Geometry | 22.5 | **14.7** | +| Boost.Geometry | 22.5 | 14.7 | | GeographicLib | 1.16 | 1.16 | -| S2 Geometry | _no public bearing API_ | | +| S2 Geometry | — | — | + +S2 has no public initial-bearing API. ### `contains` (point-in-polygon) Million queries per second (1 000 query points per iteration). +> **Apples-to-apples caveat for this op.** The four libraries do *not* +> run the same algorithm here. `geo-utils-cpp` uses an O(N) ray-cast over +> rhumb-line edges (its `geodesic=false` default — cheaper but less +> accurate near the poles). Boost.Geometry's `bg::within` with the +> spherical CS auto-selects `strategy::within::spherical_winding`, which +> traces *great-circle* edges and is materially more expensive per edge. +> S2 wins partly through algorithm (3D edge-crossing) and partly through +> structure (a bounding-rectangle prefilter on the loop). The Boost gap +> below therefore reflects algorithm choice as much as raw speed; see the +> commentary after the table. + | Library | poly N=10 | poly N=100 | poly N=1 000 | | -------------------- | --------: | ---------: | -----------: | | **geo-utils-cpp** | 16.2 | 2.87 | 0.329 | | S2 Geometry | **26.7** | **18.0** | **21.2** | | Boost.Geometry | 1.91 | 0.234 | 0.024 | -| GeographicLib | _no native PIP_ | | | - -`contains` is where the libraries differ most. **S2's `S2Loop::Contains` -exits early via a bounding-rectangle prefilter** and uses a tightly -inlined 3D edge-crossing routine — so its algorithmic throughput is -roughly *constant* in vertex count. Our implementation is a textbook O(N) -ray cast through edges (rhumb-line by default), so we lose to S2 even at -N=10 once the per-query lat/lng→S2Point conversion is excluded. With -real-world lat/lng inputs (where S2 *would* pay that conversion per -query), the gap closes for tiny polygons. Boost.Geometry's spherical -`within` traces *great-circle* edges — a heavier per-edge predicate that -explains its much lower throughput throughout. - -If your workload involves `contains` queries at any volume and you can -ship a 33 MB dependency, S2 wins. If you can't ship that — we still beat -Boost.Geometry by ~10× and ship at ~1/900 the size. +| GeographicLib | — | — | — | + +GeographicLib has no native point-in-polygon predicate. + +Practical takeaway: if `contains` is the hot path and your data already +lives as `S2Point` end-to-end, S2 is the right tool — throughput is +roughly constant in N because of the prefilter. For lat/lng-input +workloads we beat Boost.Geometry by ~10× while remaining header-only. ### `area` (M polygons/s × vertex count) @@ -134,9 +147,11 @@ Boost.Geometry by ~10× and ship at ~1/900 the size. | Boost.Geometry | 45.0 | 36.2 | 36.6 | | GeographicLib | 1.75 | 2.04 | 2.07 | -We win clearly on `area` — our spherical-triangle accumulation is a -straight-line loop with no allocation; S2's `S2Loop::GetArea` does more -work per vertex, and Boost.Geometry's strategy machinery costs ~1.8×. +N = vertices per polygon. + +We win clearly on `area` — our spherical-triangle accumulation is a tight +loop with no allocation; S2's `S2Loop::GetArea` does more work per +vertex, and Boost.Geometry's strategy machinery costs ~1.8×. ### `path_length` (M points/s) @@ -144,7 +159,7 @@ work per vertex, and Boost.Geometry's strategy machinery costs ~1.8×. | -------------------- | -------: | -------: | -------: | | **geo-utils-cpp** | 54.0 | 46.2 | 41.7 | | S2 Geometry | **105.1** | **96.7** | **91.6** | -| Boost.Geometry | 48.7 | **43.5** | **40.2** | +| Boost.Geometry | 48.7 | 43.5 | 40.2 | | GeographicLib | 1.53 | 1.27 | 1.23 | S2 wins on `path_length` algorithmically (~2×) — once the input is @@ -153,38 +168,52 @@ Boost.Geometry within noise. GeographicLib pays the ellipsoidal cost. Note: a lat/lng-input workload would push the S2 column down by the per-call conversion cost, which is not counted here. -## Disk footprint - -Stripped binary size of a minimal "distance + point-in-polygon" consumer -plus the on-disk install size of each library. Smaller is better. - -| Library | Stripped binary | Library install | Notes | -| -------------------- | --------------: | --------------: | ---------------------------------- | -| **geo-utils-cpp** | **33 KB** | **36 KB** | header-only, zero deps | -| naive haversine | **33 KB** | 0 | hand-written, no library | -| S2 Geometry | **33.4 KB** | 32.8 MB | S2 7.1 MB + abseil 14 MB (+ rest) | -| Boost.Geometry | 50.8 KB | 12.3 MB | only the `geometry` subset of Boost | -| GeographicLib | **33 KB** | 4.6 MB | distance only — no PIP | - -The "Library install" column is *what you have to ship or have on disk* to -use the library. For `geo-utils-cpp` that's the whole `include/` directory -(every header, every comment); for the others it's the package install -prefix from Homebrew. Boost is reported as just its `geometry` headers — -the full Boost install is ~362 MB. - -**The on-disk gap is roughly 130× to 900×.** If binary size matters -(embedded, container layers, mobile bundles), this is the headline. +## Deployment footprint + +A geometry library shows up in two places: at **build time** (what your +CMake has to find, what your container image has to install) and at +**runtime** (what has to sit alongside the binary so it can load the +library at startup). `geo-utils-cpp` is header-only with no deps, so +both are zero beyond the headers themselves. + +The table below covers the runtime side — stripped binary size of a +minimal "distance + point-in-polygon" consumer, dynamically linked +against each library. Smaller is better. + +| Library | Stripped binary | Notes | +| -------------------- | --------------: | ---------------------------------------------- | +| **geo-utils-cpp** | **33 KB** | header-only — nothing else to ship | +| naive haversine | **33 KB** | hand-written, no library | +| S2 Geometry | 33.4 KB | dynamic linking; `libs2.dylib` required at runtime | +| Boost.Geometry | 50.8 KB | header-only — nothing else to ship | +| GeographicLib | 33 KB | dynamic linking; `libGeographicLib.dylib` required; distance only — no PIP | + +**Bold** marks the entries that ship nothing beyond the binary, not +similarity of the size column itself. + +These numbers are for **dynamic linking**: the binary itself stays small +because the library code lives in the shared object that has to be +present at runtime alongside the binary. Static linking shifts the cost +the other way — the binary grows, but only by the symbols the linker +actually keeps (with `-ffunction-sections -Wl,--gc-sections` or LTO, +unused code is pruned). For header-only libraries (`geo-utils-cpp`, +Boost.Geometry) there's nothing to ship beyond the binary in either mode. + +The `benchmarks/size/measure.sh` script supports `STATIC=1` if you want +to repeat the comparison for statically linked binaries on your own host +— numbers will depend on which symbols your code actually pulls in. ## Where each library is the right tool -- **geo-utils-cpp** — lat/lng-native API, no-deps constraint, - container/mobile bundle size matters, `area` is hot, sphere accuracy is - acceptable. Best when you'd otherwise be paying conversion cost per call. +- **geo-utils-cpp** — lat/lng-native API, no-deps constraint, `area` is + hot, sphere accuracy is acceptable. Best when adding a geometry + framework to your build (S2 + abseil, Boost.Geometry, GeographicLib) + isn't an option, or when you'd otherwise be paying a + lat/lng→native-type conversion on every call. - **S2 Geometry** — `contains` / `distance` / `path_length` are the hot - path, you can afford a ~33 MB install, and you're willing to keep data - as `S2Point` end-to-end (otherwise the lat/lng→S2Point conversion eats - the algorithmic win). Spatial indexing (`S2ShapeIndex`) available for - bigger workloads. + path, and you're willing to keep data as `S2Point` end-to-end + (otherwise the lat/lng→S2Point conversion eats the algorithmic win). + Spatial indexing (`S2ShapeIndex`) available for bigger workloads. - **Boost.Geometry** — already-Boost project, want one library for many geometry types and CSes. Ties us on most operations; loses on `area` and on `contains`. @@ -193,6 +222,15 @@ the full Boost install is ~362 MB. ## Reproducing +Every benchmark is registered with `Repetitions(5)->ReportAggregatesOnly(true)`, +so output shows `_mean` / `_median` / `_stddev` / `_cv` per data point — +that's what the numbers in the tables above are (median rows). Use the +standard Google Benchmark CLI flags (`--benchmark_repetitions=N`, +`--benchmark_min_time=...`, etc.) to override locally. + +See [`benchmarks/README.md`](../benchmarks/README.md) for build +prerequisites and target names. + ```sh # Speed cmake -S . -B build-bench \ @@ -201,7 +239,7 @@ cmake -S . -B build-bench \ cmake --build build-bench --target bench_all -j for b in build-bench/benchmarks/bench_*; do "$b"; done -# Disk footprint +# Deployment footprint ./benchmarks/size/measure.sh ``` From e828d429dffb69471c390a1b665e581ca11d43d2 Mon Sep 17 00:00:00 2001 From: Aleksandr Kovalko Date: Tue, 19 May 2026 03:06:14 +0200 Subject: [PATCH 9/9] Docs: getting-started.md clearer integration choices, expand Requirements --- README.md | 4 +- docs/getting-started.md | 149 +++++++++++++++++++++++++++++++++------- 2 files changed, 127 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index d60b07c..d73e6f6 100644 --- a/README.md +++ b/README.md @@ -113,8 +113,8 @@ Copy the `include/` directory into your project and add it to your include path. ### Using it from CMake -With any of the above methods (vcpkg, Conan, FetchContent, or a system -`find_package`), wire it into your build with: +With any of the above methods (vcpkg, xrepo, Conan, FetchContent, or a +system `find_package`), wire it into your build with: ```cmake find_package(GeoUtilsCpp 1.0.1 REQUIRED) diff --git a/docs/getting-started.md b/docs/getting-started.md index c164961..ce0bc1e 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -1,12 +1,29 @@ -# Getting Started +# Getting started + +This page walks through installing `geo-utils-cpp` and writing your first +program. For a wider tour of the library, see the [README](../README.md); +for full function-level documentation, see [api.md](api.md); for +performance comparisons against S2, Boost.Geometry, and GeographicLib, +see [benchmarks.md](benchmarks.md). ## Requirements -- C++17 or later +- C++17 or later (tested on C++17, C++20, and C++23 in CI) - CMake 3.14 or later +- A standard-conformant compiler. CI covers recent GCC, Clang, AppleClang, + and MSVC on Linux, macOS, and Windows — see the CI badges in the + [README](../README.md) for the exact matrix. + +The library is header-only with no runtime dependencies — there is nothing +to link against beyond the headers themselves. ## Integration +Pick the option that matches how your project already manages dependencies. +If you don't have a preference, **FetchContent** is the lowest-friction +path: no separate install step, version pinned per project, works offline +once cached. + ### FetchContent (recommended) ```cmake @@ -24,15 +41,17 @@ target_link_libraries(your_target PRIVATE geo::utils) ### vcpkg -The library is available in the official [vcpkg registry](https://github.com/microsoft/vcpkg). +The library is available in the official +[vcpkg registry](https://github.com/microsoft/vcpkg). -Classic mode: +Classic mode — install once into the vcpkg shared store: ```sh vcpkg install geo-utils-cpp ``` -Manifest mode — add to your `vcpkg.json`: +Manifest mode — pin the dependency per-project in `vcpkg.json` (preferred +for new projects, since the manifest commits alongside your source): ```json { @@ -42,7 +61,7 @@ Manifest mode — add to your `vcpkg.json`: } ``` -Then consume it from CMake: +Either mode, then consume it from CMake: ```cmake find_package(GeoUtilsCpp 1.0.1 REQUIRED) @@ -76,18 +95,40 @@ A minimal smoke-test consumer (`tests/consumer/`) is included for both CMake and xmake builds — see [`tests/consumer/xmake.lua`](../tests/consumer/xmake.lua) for the xmake variant. -### find_package +### Conan + +```sh +conan install --requires=geo-utils-cpp/1.0.1 --build=missing +``` + +Conan Center support is pending +[conan-io/conan-center-index#30152](https://github.com/conan-io/conan-center-index/pull/30152); +until that lands, install from the local recipe in the +[conan/](https://github.com/gistrec/geo-utils-cpp/tree/master/conan) +directory. + +### System install -Install the library first, then: +Build and install the library to a prefix on your machine, then have +downstream projects locate it via `find_package`: + +```sh +cmake -S . -B build -DCMAKE_BUILD_TYPE=Release +cmake --install build --prefix /usr/local +``` ```cmake find_package(GeoUtilsCpp 1.0.1 REQUIRED) target_link_libraries(your_target PRIVATE geo::utils) ``` +If `find_package` can't locate the library, point CMake at the install +prefix via `-DCMAKE_PREFIX_PATH=/usr/local` (or wherever you installed it). + ### Manual -Copy the `include/` directory into your project and add it to your compiler's include path: +Copy the `include/` directory into your project and add it to your +compiler's include path: ```sh g++ main.cpp -std=c++17 -I/path/to/geo-utils-cpp/include -o main @@ -95,16 +136,17 @@ g++ main.cpp -std=c++17 -I/path/to/geo-utils-cpp/include -o main ## Usage -Include the umbrella header or individual modules: +Include the umbrella header for everything, or pull in individual modules +for tighter compile times: ```cpp // Everything at once #include // Or individual modules -#include -#include -#include +#include // types +#include // distance, heading, offset, area +#include // point-in-polygon, on-path ``` ### Example: distance and heading between two points @@ -126,11 +168,12 @@ int main() { } ``` -### Example: approximate equality with custom tolerance +### Example: round-trip with custom tolerance -`LatLng::operator==` is an approximate comparison with tolerance `1e-12` degrees -(≈ 0.1 nm). For comparisons at coarser scale — e.g. metre precision — pass an -explicit tolerance to `approx_equal`: +`LatLng::operator==` is an approximate comparison with tolerance `1e-12` +degrees (≈ 0.1 nanometers on Earth) — fine for bit-exact equality, too +strict to compare results of floating-point geometry. For coarser-scale +comparisons, pass an explicit tolerance to `approx_equal`: ```cpp #include @@ -140,7 +183,9 @@ geo::LatLng end = geo::offset(start, 100'000.0, 90.0); // 100 km east auto recovered = geo::offset_origin(end, 100'000.0, 90.0); assert(recovered.has_value()); -assert(recovered->approx_equal(start, 1e-6)); // ~10 cm tolerance +// 1e-6 degrees ≈ 11 cm on the equator — comfortably above the +// floating-point noise of a 100 km round-trip. +assert(recovered->approx_equal(start, 1e-6)); ``` ### Example: point-in-polygon @@ -152,6 +197,7 @@ assert(recovered->approx_equal(start, 1e-6)); // ~10 cm tolerance #include int main() { + // A small box around midtown Manhattan. std::vector polygon = { { 40.7650, -73.9900 }, { 40.7650, -73.9700 }, @@ -160,22 +206,77 @@ int main() { }; geo::LatLng timesSquare = { 40.7580, -73.9855 }; + geo::LatLng centralPark = { 40.7829, -73.9654 }; std::cout << std::boolalpha; - std::cout << geo::contains(timesSquare, polygon) << "\n"; // true + std::cout << "Times Square inside: " << geo::contains(timesSquare, polygon) << "\n"; // true + std::cout << "Central Park inside: " << geo::contains(centralPark, polygon) << "\n"; // false } ``` -## Build options +More runnable samples live in [`examples/`](../examples/) — each is a +single `.cpp` file you can build and run standalone. + +### Editor integration -- `GEO_UTILS_CPP_BUILD_TESTS` — build unit tests (default: ON if top-level, OFF otherwise) -- `GEO_UTILS_CPP_BUILD_EXAMPLES` — build examples (default: ON if top-level, OFF otherwise) -- `GEO_UTILS_CPP_ENABLE_COVERAGE` — gcov instrumentation, GCC/Clang only (default: OFF) +After CMake configure, `compile_commands.json` is generated in the build +directory. Most language servers (clangd, ccls) pick it up automatically; +point your editor at the build directory if it doesn't. -## Building and testing +## Building the library itself + +The sections above are for *using* `geo-utils-cpp` as a dependency. If +you're working on the library — running tests, building examples, +measuring coverage — clone the repo and configure it as a top-level +project: ```sh cmake -S . -B build cmake --build build ctest --test-dir build --output-on-failure ``` + +### Build options + +| Option | Default | Purpose | +| --------------------------------- | ------------------------ | ------- | +| `GEO_UTILS_CPP_BUILD_TESTS` | `ON` if top-level | Build unit tests | +| `GEO_UTILS_CPP_BUILD_EXAMPLES` | `ON` if top-level | Build the examples in [`examples/`](../examples/) | +| `GEO_UTILS_CPP_ENABLE_COVERAGE` | `OFF` | gcov instrumentation; GCC/Clang only | + +Downstream users consuming `geo-utils-cpp` as a dependency don't need to +touch these — tests and examples are off by default when the library is +included via `FetchContent` / `find_package`. + +## Troubleshooting + +A few issues that catch people on first use: + +- **`find_package(GeoUtilsCpp)` fails.** The install prefix isn't on + CMake's search path. Add `-DCMAKE_PREFIX_PATH=/path/to/prefix` to your + configure command (or set the env var). + +- **`FetchContent` re-downloads on every configure.** Add + `-DFETCHCONTENT_UPDATES_DISCONNECTED=ON` once the dependency is cached, + or pin to a specific `GIT_TAG` (which we already do in the snippet + above). + +- **`no matching function for call to 'area({{...}, ...})'`.** A braced + initializer can't be deduced as a `Path` template parameter — wrap it + in a `std::vector` first. See the Path notes in + [api.md](api.md#path). + +- **clangd doesn't see the headers.** Make sure CMake has generated + `compile_commands.json` (it does so automatically once you've + configured a build directory) and that your editor is pointed at it. + +## Next steps + +- [API reference](api.md) — every public function, with examples and + edge-case notes. +- [Benchmarks](benchmarks.md) — how `geo-utils-cpp` compares to S2, + Boost.Geometry, and GeographicLib, and where each one is the right + tool. +- [`examples/`](../examples/) — runnable code samples. +- [Open an issue](https://github.com/gistrec/geo-utils-cpp/issues) if + something here doesn't work or doesn't make sense.