diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..c3c8a8c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "chore(deps)" + + - package-ecosystem: "uv" + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "chore(deps)" + open-pull-requests-limit: 5 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..920aff3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,40 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v4 + - name: Ruff check + run: uvx ruff check kos/ tests/ examples/ + - name: Ruff format check + run: uvx ruff format --check kos/ tests/ examples/ + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v4 + - name: Install + run: uv sync --extra dev + - name: Pytest + run: uv run pytest tests/ -v + + notebook-pairing-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v4 + - name: Verify jupytext-paired examples are in sync + run: | + # The .ipynb in examples/ is paired with a .py shadow. Running + # --sync is a no-op if both are in sync; if it produces a diff, + # someone edited one half without the other. + uvx --with jupytext jupytext --sync examples/*.ipynb + git diff --exit-code -- examples/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ec95e81 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,57 @@ +# Contributing + +kos is a primitives library — small, focused implementations of measurement +metrics for perp microstructure research. Contributions that fit: + +1. **Bug fixes with property tests.** If you can write a property the existing + test suite doesn't cover and find a case where it fails, that's the + highest-value contribution. See `tests/test_properties.py` for the style. +2. **New primitives.** A measurement metric used in the research literature + that fits the kos shape: pure-function, well-tested, runnable on a numpy + array. Open an issue first to discuss scope. +3. **Documentation.** Better docstrings, clearer worked examples in + `examples/`, prose-side improvements that get rendered into the demos. +4. **Numerical correctness.** Edge cases (empty arrays, NaN propagation, + single-element inputs) where the current implementation is sloppy. + +Real-data demos and applied research live in +[`paros`](https://github.com/Xylem-Group/paros), not here. kos's `examples/` +are synthetic-data demonstrations of the primitives; cross-venue replication, +benchmark tables, and venue-specific calibrations belong in paros. + +## Code style + +- Python 3.11+. `uv sync --extra dev` for the dev environment. +- `uvx ruff check` and `uvx ruff format --check` run in CI on `kos/`, + `tests/`, and `examples/`. Run them locally before pushing. +- Tests are pytest. Property tests use plain `assert` and explicit examples; + not currently using hypothesis. +- Keep examples deterministic — seed RNG explicitly. + +## Notebook ↔ .py pairing + +The example notebooks in `examples/` are paired with `.py` shadows via +[jupytext]. Both files exist in the repo; `.ipynb` carries outputs, `.py` is +the diffable source. + +```bash +# Edit either file, then sync the other: +uvx --with jupytext jupytext --sync examples/markout_demo.ipynb + +# Add a new paired example: +uvx --with jupytext jupytext --set-formats ipynb,py:percent examples/new_demo.ipynb +uvx --with jupytext jupytext --sync examples/new_demo.ipynb +``` + +CI runs `--sync` and fails if it produces a diff. + +## License + +CC BY-NC-4.0 — see `LICENSE`. Non-commercial use, attribution required. +Contributing implies you accept the license terms. + +## Reporting security issues + +See [SECURITY.md](SECURITY.md). Don't open a public issue for vulnerabilities. + +[jupytext]: https://jupytext.readthedocs.io/ diff --git a/README.md b/README.md index 1ebb26a..87c2f1c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ # kos +[![CI](https://github.com/Xylem-Group/kos/actions/workflows/ci.yml/badge.svg)](https://github.com/Xylem-Group/kos/actions/workflows/ci.yml) +[![License: CC BY-NC 4.0](https://img.shields.io/badge/license-CC%20BY--NC%204.0-lightgrey.svg)](LICENSE) +[![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](pyproject.toml) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) + **Measurement primitives for perp microstructure research.** Reference implementations of the metrics we use at Xylem Group, published as a runnable companion to our [research notes](https://xylem-group.org). If you read a method described in prose, you can run it on your own data. Three primitives — `markout`, `vpin`, `if_stress` — each in `kos/.py` with a synthetic notebook demo in `examples/` and tests in `tests/`. Real-data demos and applied research live in the sister repo [`paros`](https://github.com/Xylem-Group/paros), which builds on these primitives. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..550d3d0 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,31 @@ +# Security policy + +kos is a numerical library — pure-function metrics on numpy arrays, no +network calls, no file I/O outside what the user invokes. The realistic +threat surface is small. But if you find something worth reporting, please +report it privately rather than opening a public issue. + +## Reporting a vulnerability + +Email **security@xylem-group.org** with: + +- A description of the issue and the affected primitive (`markout`, + `vpin`, `if_stress`, or another module). +- A minimal reproduction (test case or array input). +- Whether you've shared it elsewhere (private disclosure timeline matters). + +We'll acknowledge within 72 hours and aim to fix or comment on a path +forward within two weeks. We don't run a paid bug bounty for this repo. + +## Out of scope + +- Numerical-stability concerns at extreme inputs (e.g., `markout` on + arrays with `inf` or all-NaN slices). Open a regular issue or PR — these + are correctness bugs, not security issues. +- Methodology disagreements (e.g., "VPIN bucket sizing should differ"). + Open a regular issue or PR. +- Issues in dependencies (numpy). Report those upstream. + +## Supported versions + +kos is pre-release; only the latest release tag and `main` are supported. diff --git a/examples/if_stress_demo.ipynb b/examples/if_stress_demo.ipynb index d94071b..462a21f 100644 --- a/examples/if_stress_demo.ipynb +++ b/examples/if_stress_demo.ipynb @@ -142,6 +142,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", diff --git a/examples/if_stress_demo.py b/examples/if_stress_demo.py index 53245e5..e1816a5 100644 --- a/examples/if_stress_demo.py +++ b/examples/if_stress_demo.py @@ -1,10 +1,29 @@ -"""Sweep IF size to find the smallest that survives a 15% shock 95% of the time.""" +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- +# %% [markdown] +# # IF Stress Demo +# Sweep IF size to find the smallest that survives a 15% shock 95% of the time. + +# %% import numpy as np from kos.if_stress import Position, survival_probability +# %% def population(rng: np.random.Generator) -> list[Position]: """100 long positions, leverage drawn from a 5-50x distribution.""" n = 100 @@ -16,20 +35,49 @@ def population(rng: np.random.Generator) -> list[Position]: ] -def main() -> None: - shock = -0.15 - print(f"shock: {shock:+.0%}") - print(f"{'IF size ($M)':>14} {'P(survive)':>12}") - for if_size_m in [0.5, 1, 2, 5, 10, 20]: - p = survival_probability( - population_factory=population, - shock_pct=shock, - if_initial=if_size_m * 1e6, - n_trials=200, - rng=np.random.default_rng(0), - ) - print(f"{if_size_m:14.1f} {p:12.2%}") +# %% +shock = -0.15 +fill_quality_bps = -500.0 # 5% slippage on stressed liquidations +print(f"shock: {shock:+.0%}, fill quality: {fill_quality_bps:+.0f} bps") +print(f"{'IF size ($M)':>14} {'P(survive)':>12}") +for if_size_m in [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5]: + p = survival_probability( + population_factory=population, + shock_pct=shock, + if_initial=if_size_m * 1e6, + n_trials=200, + fill_quality_bps=fill_quality_bps, + rng=np.random.default_rng(0), + ) + print(f"{if_size_m:14.2f} {p:12.2%}") + +# %% +import matplotlib.pyplot as plt + +if_sizes_m = [0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5] +probs = [ + survival_probability( + population_factory=population, + shock_pct=shock, + if_initial=s * 1e6, + n_trials=200, + fill_quality_bps=fill_quality_bps, + rng=np.random.default_rng(0), + ) + for s in if_sizes_m +] + +fig, ax = plt.subplots(figsize=(8, 4)) +ax.semilogx(if_sizes_m, probs, marker="o") +ax.axhline(0.95, color="red", linestyle="--", alpha=0.5, label="95% target") +ax.set_xlabel("IF size ($M)") +ax.set_ylabel("P(survive 15% shock)") +ax.set_title(f"IF sizing curve — 100-position book, fill quality {fill_quality_bps:+.0f} bps") +ax.set_ylim(-0.05, 1.05) +ax.legend() +ax.grid(True, alpha=0.3) +plt.show() +# %% -if __name__ == "__main__": - main() +# %% diff --git a/examples/markout_demo.ipynb b/examples/markout_demo.ipynb index dd476ee..3e847b6 100644 --- a/examples/markout_demo.ipynb +++ b/examples/markout_demo.ipynb @@ -16,8 +16,8 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", "import matplotlib.pyplot as plt\n", + "import numpy as np\n", "\n", "from kos.markout import (\n", " MarkoutNormalizers,\n", @@ -115,6 +115,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "kos (.venv)", "language": "python", diff --git a/examples/markout_demo.py b/examples/markout_demo.py index 54fbbbe..d402d6a 100644 --- a/examples/markout_demo.py +++ b/examples/markout_demo.py @@ -1,5 +1,24 @@ -"""Markout curve for a single buy fill against a synthetic mid path.""" +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.1 +# kernelspec: +# display_name: kos (.venv) +# language: python +# name: kos +# --- +# %% [markdown] +# # Markout Demo +# Markout curve for a single buy fill against a synthetic, adversely-drifting mid path. + +# %% +import matplotlib.pyplot as plt import numpy as np from kos.markout import ( @@ -9,32 +28,39 @@ markout_normalized, ) +# %% +fill_price = 100.0 +horizons = log_spaced_horizons(t_min_ms=1.0, t_max_ms=60_000.0, n=10) -def main() -> None: - fill_price = 100.0 - horizons = log_spaced_horizons(t_min_ms=1.0, t_max_ms=60_000.0, n=10) - - # Synthetic mid path: drift -2 bps per sqrt(s), so this fill - # was adversely selected at every horizon. - rng = np.random.default_rng(7) - sigma_bps_per_sqrt_ms = 0.5 - drift_bps_per_sqrt_ms = -0.2 - z = rng.standard_normal(len(horizons)) - bps_path = (drift_bps_per_sqrt_ms + sigma_bps_per_sqrt_ms * z) * np.sqrt(horizons) - mids = fill_price * (1 + bps_path / 1e4) +# Synthetic mid path: drift -2 bps per sqrt(s), so this fill +# was adversely selected at every horizon. +rng = np.random.default_rng(7) +sigma_bps_per_sqrt_ms = 0.5 +drift_bps_per_sqrt_ms = -0.2 +z = rng.standard_normal(len(horizons)) +bps_path = (drift_bps_per_sqrt_ms + sigma_bps_per_sqrt_ms * z) * np.sqrt(horizons) +mids = fill_price * (1 + bps_path / 1e4) - raw = markout_curve("buy", fill_price, horizons, mids) - norm = MarkoutNormalizers( - half_spread_bps=2.0, - sigma_bps_per_sqrt_ms=sigma_bps_per_sqrt_ms, - impact_prior_bps=4.0, - ) +raw = markout_curve("buy", fill_price, horizons, mids) +norm = MarkoutNormalizers( + half_spread_bps=2.0, + sigma_bps_per_sqrt_ms=sigma_bps_per_sqrt_ms, + impact_prior_bps=4.0, +) - print(f"{'horizon (ms)':>14} {'raw bps':>10} {'/HS':>8} {'/vol':>8} {'/imp':>8}") - for h, r in zip(horizons, raw): - n = markout_normalized(r, h, norm) - print(f"{h:14.2f} {r:10.2f} {n['hs']:8.2f} {n['vol']:8.2f} {n['impact']:8.2f}") +print(f"{'horizon (ms)':>14} {'raw bps':>10} {'/HS':>8} {'/vol':>8} {'/imp':>8}") +for h, r in zip(horizons, raw): + n = markout_normalized(r, h, norm) + print(f"{h:14.2f} {r:10.2f} {n['hs']:8.2f} {n['vol']:8.2f} {n['impact']:8.2f}") +# %% +fig, ax = plt.subplots(figsize=(8, 4)) +ax.semilogx(horizons, raw, marker="o") +ax.axhline(0, color="gray", linewidth=0.8) +ax.set_xlabel("horizon (ms)") +ax.set_ylabel("markout (bps)") +ax.set_title("Raw markout curve — buy fill") +ax.grid(True, alpha=0.3) +plt.show() -if __name__ == "__main__": - main() +# %% diff --git a/examples/vpin_demo.ipynb b/examples/vpin_demo.ipynb index b6ed533..8e0ff53 100644 --- a/examples/vpin_demo.ipynb +++ b/examples/vpin_demo.ipynb @@ -16,8 +16,8 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", "import matplotlib.pyplot as plt\n", + "import numpy as np\n", "\n", "from kos.vpin import vpin" ] @@ -55,7 +55,7 @@ "series = vpin(returns, volumes, bucket_size=50.0, window=50)\n", "print(f\"VPIN buckets: {len(series)}\")\n", "print(f\" pre-shock mean (first quarter): {series[: len(series) // 4].mean():.3f}\")\n", - "print(f\" post-shock mean (last quarter): {series[-len(series) // 4:].mean():.3f}\")" + "print(f\" post-shock mean (last quarter): {series[-len(series) // 4 :].mean():.3f}\")" ] }, { @@ -97,6 +97,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": "kos (.venv)", "language": "python", diff --git a/examples/vpin_demo.py b/examples/vpin_demo.py index aeb0b68..54a0f5d 100644 --- a/examples/vpin_demo.py +++ b/examples/vpin_demo.py @@ -1,30 +1,56 @@ -"""VPIN (Volume-synchronized Probability of Informed Trading) on a synthetic tape with an injected toxic regime.""" - +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.1 +# kernelspec: +# display_name: kos (.venv) +# language: python +# name: kos +# --- + +# %% [markdown] +# # VPIN Demo +# Volume-synchronized Probability of Informed Trading on a synthetic tape with an injected toxic regime. + +# %% +import matplotlib.pyplot as plt import numpy as np from kos.vpin import vpin - -def main() -> None: - rng = np.random.default_rng(42) - n = 20_000 - - # Phase 1: balanced, low-vol. - # Phase 2: directional shock — returns positive on average. - returns = np.concatenate( - [ - rng.normal(0.0, 1.0, size=n // 2), - rng.normal(0.5, 1.0, size=n // 2), # injected toxic - ] - ) - volumes = np.abs(rng.normal(1.0, 0.2, size=n)) + 0.1 - - series = vpin(returns, volumes, bucket_size=50.0, window=50) - print(f"VPIN buckets: {len(series)}") - print(f" pre-shock mean (first quarter): {series[: len(series) // 4].mean():.3f}") - print(f" post-shock mean (last quarter): {series[-len(series) // 4:].mean():.3f}") - print("Expect post-shock VPIN materially higher than pre.") - - -if __name__ == "__main__": - main() +# %% +rng = np.random.default_rng(42) +n = 20_000 + +# Phase 1: balanced, low-vol. +# Phase 2: directional shock — returns positive on average. +returns = np.concatenate( + [ + rng.normal(0.0, 1.0, size=n // 2), + rng.normal(0.5, 1.0, size=n // 2), # injected toxic + ] +) +volumes = np.abs(rng.normal(1.0, 0.2, size=n)) + 0.1 + +series = vpin(returns, volumes, bucket_size=50.0, window=50) +print(f"VPIN buckets: {len(series)}") +print(f" pre-shock mean (first quarter): {series[: len(series) // 4].mean():.3f}") +print(f" post-shock mean (last quarter): {series[-len(series) // 4 :].mean():.3f}") + +# %% +fig, ax = plt.subplots(figsize=(9, 4)) +ax.plot(series, linewidth=0.8) +ax.axvline(len(series) // 2, color="red", linestyle="--", alpha=0.6, label="toxic regime starts") +ax.set_xlabel("bucket") +ax.set_ylabel("VPIN") +ax.set_title("VPIN over time — pre/post toxic injection") +ax.legend() +ax.grid(True, alpha=0.3) +plt.show() + +# %% diff --git a/kos/__init__.py b/kos/__init__.py index 80cdd61..2e262f9 100644 --- a/kos/__init__.py +++ b/kos/__init__.py @@ -1,8 +1,8 @@ """kos — measurement primitives for perp microstructure research.""" -from kos.markout import markout, markout_bps, log_spaced_horizons -from kos.vpin import vpin, bulk_volume_classify -from kos.if_stress import simulate_cascade, IFStressResult +from kos.if_stress import IFStressResult, simulate_cascade +from kos.markout import log_spaced_horizons, markout, markout_bps +from kos.vpin import bulk_volume_classify, vpin __all__ = [ "markout", diff --git a/kos/if_stress.py b/kos/if_stress.py index 32a9384..a14079c 100644 --- a/kos/if_stress.py +++ b/kos/if_stress.py @@ -31,7 +31,8 @@ @dataclass class Position: """Long position. Shorts are mirror; left out for brevity.""" - notional: float # USD-notional at entry + + notional: float # USD-notional at entry entry_price: float leverage: float maint_margin_pct: float = 0.005 # fraction; 0.5% default diff --git a/kos/vpin.py b/kos/vpin.py index 8ca126e..cbc4958 100644 --- a/kos/vpin.py +++ b/kos/vpin.py @@ -113,5 +113,5 @@ def vpin( if window <= 0 or window > n_buckets: raise ValueError(f"window must be in 1..{n_buckets}, got {window}") cumsum = np.cumsum(bucket_toxicity) - rolling = (cumsum[window - 1:] - np.concatenate([[0.0], cumsum[:-window]])) / window + rolling = (cumsum[window - 1 :] - np.concatenate([[0.0], cumsum[:-window]])) / window return rolling diff --git a/pyproject.toml b/pyproject.toml index 84ff9e7..45c101e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,3 +43,26 @@ dev = [ "matplotlib>=3.10.9", "pytest>=9.0.3", ] + +[tool.ruff] +target-version = "py311" +line-length = 100 +extend-include = ["*.ipynb"] + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "B", # flake8-bugbear + "UP", # pyupgrade + "SIM", # flake8-simplify +] +ignore = [ + "E501", # line too long (formatter handles) + "B905", # zip() strict= parameter +] + +[tool.ruff.lint.per-file-ignores] +"examples/*" = ["E402"] # module-level import not at top (cell-based) diff --git a/tests/test_properties.py b/tests/test_properties.py index ebd1d0d..3b26cf7 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -13,9 +13,9 @@ ) from kos.vpin import bulk_volume_classify, vpin - # --- markout --- + def test_markout_zero_at_fill_price(): assert markout_bps("buy", 100.0, 100.0) == 0.0 assert markout_bps("sell", 100.0, 100.0) == 0.0 @@ -70,6 +70,7 @@ def test_markout_normalized_hs_units(): # --- vpin --- + def test_vpin_in_unit_interval(): rng = np.random.default_rng(0) r = rng.standard_normal(5000) @@ -120,6 +121,7 @@ def test_vpin_rejects_invalid_window(): # --- if_stress --- + def test_zero_shock_no_liquidations(): pop = [Position(notional=100_000.0, entry_price=100.0, leverage=10.0) for _ in range(10)] res = simulate_cascade(pop, shock_pct=0.0, if_initial=1e6) @@ -139,15 +141,22 @@ def test_low_leverage_survives_small_shock(): def test_survival_monotone_in_if_size(): """P(survive) must be non-decreasing as if_initial grows.""" + def population(rng): return [ - Position(notional=float(rng.lognormal(12, 0.7)), entry_price=100.0, leverage=float(rng.uniform(5, 50))) + Position( + notional=float(rng.lognormal(12, 0.7)), + entry_price=100.0, + leverage=float(rng.uniform(5, 50)), + ) for _ in range(50) ] sizes = [1e3, 1e4, 1e5, 1e6, 1e7] probs = [ - survival_probability(population, shock_pct=-0.15, if_initial=s, n_trials=50, fill_quality_bps=-500.0) + survival_probability( + population, shock_pct=-0.15, if_initial=s, n_trials=50, fill_quality_bps=-500.0 + ) for s in sizes ] for a, b in zip(probs, probs[1:]): diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 7bbf1c0..d41b261 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -52,18 +52,13 @@ def test_vpin_runs(): def test_if_stress_no_shock_no_liquidations(): - pop = [ - Position(notional=100_000.0, entry_price=100.0, leverage=10.0) for _ in range(5) - ] + pop = [Position(notional=100_000.0, entry_price=100.0, leverage=10.0) for _ in range(5)] res = simulate_cascade(pop, shock_pct=0.0, if_initial=1e6) assert res.liquidated == [] and res.adl_queue == [] def test_if_stress_large_shock_drains_or_adls(): - pop = [ - Position(notional=100_000.0, entry_price=100.0, leverage=20.0) - for _ in range(20) - ] + pop = [Position(notional=100_000.0, entry_price=100.0, leverage=20.0) for _ in range(20)] res = simulate_cascade(pop, shock_pct=-0.30, if_initial=1_000.0) # Some positions must be processed: liquidated or ADL'd. assert (len(res.liquidated) + len(res.adl_queue)) > 0