diff --git a/.flake8 b/.flake8
deleted file mode 100644
index adf399e..0000000
--- a/.flake8
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-max-line-length = 120
-ignore = E203,E231,W503
diff --git a/.github/workflows/confidence.yml b/.github/workflows/confidence.yml
index d657dc6..c4553ed 100644
--- a/.github/workflows/confidence.yml
+++ b/.github/workflows/confidence.yml
@@ -13,18 +13,22 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ['3.9', '3.10', '3.11']
+ python-version: ['3.9', '3.10', '3.11', '3.12']
steps:
- - uses: actions/checkout@v1
+ - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ with:
+ enable-cache: true
+ cache-dependency-glob: "**/pyproject.toml"
- name: Install dependencies
run: |
- python -m pip install --upgrade pip
- if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
- python -m pip install tox tox-gh-actions
+ uv pip install --system -e ".[dev]"
+ uv pip install --system tox tox-gh-actions
- name: Test with tox
run: tox
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 4f2e205..fb9d87f 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -18,11 +18,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Set up Python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
- python-version: '3.9'
+ python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
diff --git a/.gitignore b/.gitignore
index 9cd5fd6..edcb3af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -90,4 +90,8 @@ ENV/
.DS_store
-.idea/
\ No newline at end of file
+.idea/
+
+# uv
+uv.lock
+.venv/
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..fae842d
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,157 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+Spotify Confidence is a Python library for A/B test analysis. It provides convenience wrappers around statsmodel's functions for computing p-values and confidence intervals. The library supports both frequentist (Z-test, Student's T-test, Chi-squared) and Bayesian (BetaBinomial) statistical methods, with features for variance reduction, sequential testing, and sample size calculations.
+
+## Development Commands
+
+### Setup
+```bash
+# Install with development dependencies (including tox-uv)
+uv pip install -e ".[dev]"
+```
+
+### Testing
+```bash
+# Run all tests with coverage
+uv run pytest
+
+# Run tests without coverage reports
+uv run pytest --no-cov
+
+# Run specific test file
+uv run pytest tests/frequentist/test_z_test.py
+
+# Run specific test
+uv run pytest tests/frequentist/test_z_test.py::test_name
+
+# Run all tests across Python versions
+uv run tox
+```
+
+### Code Quality
+```bash
+# Format code with black (line length: 119)
+uv run black spotify_confidence tests
+
+# Check formatting without making changes
+uv run black --check --diff spotify_confidence tests
+
+# Lint with flake8 (max line length: 120)
+uv run flake8 spotify_confidence tests
+
+# Run all quality checks (as done in CI)
+uv run black --check --diff spotify_confidence tests && uv run flake8 spotify_confidence tests && uv run pytest
+```
+
+### Build
+```bash
+# Build distribution packages
+uv run python -m build
+```
+
+## Architecture
+
+### Core Design Pattern
+
+The library follows an object-oriented design with separation of concerns:
+
+1. **Statistical Test Classes**: High-level APIs (`ZTest`, `StudentsTTest`, `ChiSquared`, `BetaBinomial`, `ZTestLinreg`)
+2. **Experiment Class**: Base class containing shared analysis methods for frequentist tests
+3. **Computer Classes**: Perform the actual statistical computations
+4. **Grapher Classes**: Generate visualizations using Chartify
+
+All main test classes inherit from abstract base classes in `spotify_confidence/analysis/abstract_base_classes/`:
+- `ConfidenceABC`: Base for all statistical test classes
+- `ConfidenceComputerABC`: Base for computation logic
+- `ConfidenceGrapherABC`: Base for visualization logic
+
+### Module Structure
+
+```
+spotify_confidence/
+├── analysis/
+│ ├── abstract_base_classes/ # ABC definitions for the framework
+│ ├── frequentist/ # Frequentist statistical methods
+│ │ ├── confidence_computers/ # Statistical computation logic
+│ │ ├── experiment.py # Base class for frequentist tests
+│ │ ├── z_test.py # Z-test implementation
+│ │ ├── t_test.py # Student's T-test implementation
+│ │ ├── chi_squared.py # Chi-squared test
+│ │ ├── z_test_linreg.py # Z-test with linear regression variance reduction
+│ │ ├── sequential_bound_solver.py # Group sequential testing
+│ │ ├── multiple_comparison.py # Multiple testing correction
+│ │ └── sample_size_calculator.py
+│ ├── bayesian/ # Bayesian methods
+│ │ └── bayesian_models.py # BetaBinomial implementation
+│ ├── constants.py # Shared constants
+│ └── confidence_utils.py # Shared utility functions
+├── samplesize/ # Sample size calculations
+├── examples.py # Example data generators
+├── chartgrid.py # Chart grid utilities
+└── options.py # Global configuration
+```
+
+### Key Classes and Their Relationships
+
+- **Experiment** (in `frequentist/experiment.py`): The core base class for frequentist tests. Provides methods like:
+ - `summary()`: Overall metric summaries
+ - `difference()`: Pairwise comparisons
+ - `multiple_difference()`: Multiple comparisons with correction
+ - `difference_plot()`, `summary_plot()`, etc.: Visualization methods
+ - `sample_size()`: Required sample size calculations
+ - `statistical_power()`: Power analysis
+
+- **ZTest, StudentsTTest, ChiSquared**: Thin wrappers that initialize `Experiment` with the appropriate computer and method
+
+- **Computer Classes** (in `frequentist/confidence_computers/`): Handle the statistical calculations
+ - `ZTestComputer`, `TTestComputer`, `ChiSquaredComputer`: Specific computation implementations
+ - All inherit from `ConfidenceComputerABC`
+
+- **ChartifyGrapher**: Implements visualization using the Chartify library
+
+### Data Model
+
+The library works with DataFrames containing sufficient statistics:
+- `numerator_column`: Sum or count (e.g., sum of conversions)
+- `denominator_column`: Total observations (e.g., total users)
+- `numerator_sum_squares_column`: Sum of squares (optional, for variance calculations)
+- `categorical_group_columns`: Treatment/control groups and other dimensions
+- `ordinal_group_column`: Time-based grouping for sequential analysis
+
+### Important Conventions
+
+1. **Method Column**: Tests add a `METHOD_COLUMN_NAME` to data indicating the test type (e.g., "z-test", "t-test")
+
+2. **Multiple Comparison Correction**: Supported methods defined in `constants.py`:
+ - Standard: bonferroni, holm, hommel, sidak, FDR methods
+ - SPOT-1 variants: Custom Spotify methods for specific use cases
+
+3. **Non-Inferiority Margins (NIMs)**: Can be specified as absolute values or relative percentages
+
+4. **Sequential Testing**: The `sequential_bound_solver.py` module implements group sequential designs with spending functions
+
+5. **Variance Reduction**: `ZTestLinreg` uses pre-exposure data to fit a linear model and reduce variance (CUPED method)
+
+## Testing Guidelines
+
+- Tests are organized to mirror the source structure under `tests/`
+- Use pytest fixtures for common test data
+- Tests check both DataFrame outputs and chart generation
+- Coverage target is configured in `pyproject.toml`
+
+## Python Version Support
+
+Supports Python 3.9, 3.10, 3.11, and 3.12. The `tox.ini` includes a `py39-min` environment that tests with minimum dependency versions.
+
+The project uses `tox-uv` to leverage uv's fast package installation and environment management in tox, significantly speeding up multi-environment testing. The GitHub Actions CI workflow also uses uv for faster dependency installation.
+
+## Code Style
+
+- Black formatting with 119 character line length
+- Flake8 linting with max line length 120
+- Ignored flake8 rules: E203, E231, W503
+- Excluded from linting: `.venv`, `.tox`, `dist`, `build`, `scratch.py`, `confidence_dev`
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index b175338..86b3bd7 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -57,41 +57,55 @@ Get Started!
Ready to contribute? Here's how to set up `confidence` for local development.
+**Prerequisites:**
+
+* `uv `_ - Fast Python package installer (recommended)
+* Python 3.9 or later
+
1. Fork the `confidence` repo on GitHub.
2. Clone your fork locally::
- $ git clone https://github.com/spotify/confidence
+ $ git clone git@github.com:your_username/confidence.git
+ $ cd confidence
+
+3. Set up your development environment using uv::
+
+ $ uv venv
+ $ uv pip install -e ".[dev]"
-3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
+ This creates a virtual environment and installs the package in editable mode with all development dependencies.
- $ mkvirtualenv confidence_dev
- $ cd confidence/
- $ tox
+4. Verify your setup by running the tests::
- The tox command will install the dev requirements in requirements_dev.txt and run all tests.
+ $ uv run pytest
-4. Create a branch for local development::
+ This should run all tests and show they pass.
+
+5. Create a branch for local development::
$ git checkout -b name-of-your-bugfix-or-feature
Now you can make your changes locally.
-5. When you're done making changes, format using `make black`, check that your changes pass flake8 and the tests, including testing other Python versions with tox::
+6. When you're done making changes, check that your changes pass all quality checks::
+
+ $ uv run black spotify_confidence tests --line-length 119 # Format code
+ $ uv run flake8 spotify_confidence tests # Lint code
+ $ uv run pytest # Run tests
+
+ To test across all supported Python versions (3.9, 3.10, 3.11, 3.12)::
- $ make black
- $ flake8 confidence tests
- $ python setup.py test or py.test
- $ tox
+ $ uv run tox -p auto
- To get flake8 and tox, just pip install them into your virtualenv.
+ Note: tox requires all Python versions to be installed on your system.
-6. Commit your changes and push your branch to GitHub::
+7. Commit your changes and push your branch to GitHub::
$ git add .
$ git commit -m "Your detailed description of your changes."
$ git push origin name-of-your-bugfix-or-feature
-7. Submit a pull request through the GitHub website.
+8. Submit a pull request through the GitHub website.
Pull Request Guidelines
-----------------------
@@ -101,23 +115,36 @@ Before you submit a pull request, check that it meets these guidelines:
1. The pull request should include tests.
2. If the pull request adds functionality, the docs should be updated. Put
your new functionality into a function with a docstring, and add the
- feature to the list in README.rst.
-3. The pull request should work for Python 3.6 and 3.7. Check
- and make sure that the tests pass for all supported Python versions.
+ feature to the list in README.md.
+3. The pull request should work for Python 3.9, 3.10, 3.11, and 3.12. The CI
+ pipeline will automatically test all supported Python versions.
Tips
----
To run a subset of tests::
-$ py.test tests.test_confidence
+ $ uv run pytest tests/frequentist/test_ttest.py
+
+To run a specific test::
+
+ $ uv run pytest tests/frequentist/test_ttest.py::TestCategorical::test_summary
+
+To run tests with verbose output::
+
+ $ uv run pytest -v
+
+To see test coverage::
+
+ $ uv run pytest --cov=spotify_confidence --cov-report=html
+ $ open htmlcov/index.html
Release Process
-----------------------
While commits and pull requests are welcome from any contributor, we try to
-simplify the distribution process for everyone by managing the release
+simplify the distribution process for everyone by managing the release
process with specific contributors serving in the role of Release Managers.
Release Managers are responsible for:
@@ -142,7 +169,7 @@ PATCH version when you make backwards-compatible bug fixes.
Release Stategy
~~~~~~~~~~~~~~~~
-Each new release will be made on its own branch, with the branch Master
+Each new release will be made on its own branch, with the branch Master
representing the most recent, furthest release. Releases are published to PyPi
automatically once a new release branch is merged to Master. Additionally,
rew releases are also tracked manually on `github
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 2353e85..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,10 +0,0 @@
-include README.md
-include CONTRIBUTING.md
-include AUTHORS.md
-
-recursive-include tests *
-recursive-exclude * __pycache__
-recursive-exclude * *.py[co]
-
-recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif *.py
-recursive-include confidence/ *.rst conf.py Makefile make.bat *.jpg *.png *.gif *.py
\ No newline at end of file
diff --git a/Makefile b/Makefile
index b8f0ff3..8c0ff85 100644
--- a/Makefile
+++ b/Makefile
@@ -47,14 +47,17 @@ clean-test: ## remove test and coverage artifacts
rm -f .coverage
rm -fr htmlcov/
+format: ## format code with black
+ black spotify_confidence tests --line-length 119
+
lint: ## check style with flake8
- flake8 confidence tests
+ flake8 spotify_confidence tests
test: ## run tests quickly with the default Python
python3 -m pytest
coverage: ## check code coverage quickly with the default Python
- coverage run --source confidence -m pytest
+ coverage run --source spotify_confidence -m pytest
coverage report -m
coverage html
$(BROWSER) htmlcov/index.html
@@ -86,10 +89,8 @@ install: clean ## install the package to the active Python's site-packages
pip install -e .
install-test: clean
- pip3 install --index-url https://test.pypi.org/simple/ confidence-spotify
+ pip3 install --index-url https://test.pypi.org/simple/ spotify-confidence
install-prod: clean
- pip3 install confidence-spotify
+ pip3 install spotify-confidence
-black:
- black spotify_confidence tests --line-length 119
diff --git a/pyproject.toml b/pyproject.toml
index f6c1689..57fda33 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,62 @@
[build-system]
-requires = [
- "setuptools>=42",
- "wheel",
-]
+requires = ["setuptools>=61.2"]
build-backend = "setuptools.build_meta"
+
+[project]
+name = "spotify-confidence"
+version = "4.0.0"
+description = "Package for calculating and visualising confidence intervals, e.g. for A/B test analysis."
+readme = "README.md"
+license = {file = "LICENSE"}
+authors = [{name = "Per Sillren", email = "pers@spotify.com"}]
+requires-python = ">=3.9"
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
+]
+dependencies = [
+ "numpy>=1.21.0",
+ "scipy>=1.9.0",
+ "pandas>=1.4.0",
+ "statsmodels>=0.13.5",
+ "chartify>=5.0.0",
+ "ipywidgets>=8.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+ "build",
+ "twine",
+ "black>=23.7.0",
+ "flake8>=6.0.0",
+ "tox>=4.0.0",
+ "tox-uv>=1.0.0",
+ "pytest>=7.0.0",
+ "pytest-cov>=4.0.0",
+ "pytest-xdist>=3.0.2",
+ "coverage>=7.0.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/spotify/confidence"
+"Bug Tracker" = "https://github.com/spotify/confidence/issues"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["spotify_confidence*"]
+namespaces = false
+
+[tool.black]
+line-length = 119
+target-version = ["py39", "py310", "py311", "py312"]
+
+[tool.pytest.ini_options]
+addopts = "-v -n auto --cov=spotify_confidence --cov-report=html --cov-report=xml --cov-report=term-missing"
+testpaths = ["tests"]
+
+[tool.coverage.run]
+source = ["spotify_confidence"]
+
+[tool.coverage.report]
+show_missing = true
diff --git a/requirements_dev.txt b/requirements_dev.txt
deleted file mode 100644
index cf02b2e..0000000
--- a/requirements_dev.txt
+++ /dev/null
@@ -1,20 +0,0 @@
--e .
-pip==23.0.1
-build
-twine
-bumpversion==0.5.3
-watchdog==0.8.3
-flake8==4.0.1
-tox==4.4.7
-Sphinx==1.4.8
-pytest-runner==6.0.0
-jupyterlab==3.2.9
-pylint==1.7.4
-coverage==4.5.1
-pytest==7.0.1
-pytest-cov==2.5.1
-ipywidgets>=7.1.0
-black==23.1.0
-ipython>=8.10.0 # not directly required, pinned by Snyk to avoid a vulnerability
-setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
-tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 4186a42..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,31 +0,0 @@
-[metadata]
-name = spotify-confidence
-version = 4.0.0
-author = Per Sillren
-author_email = pers@spotify.com
-description = Package for calculating and visualising confidence intervals, e.g. for A/B test analysis.
-long_description = file: README.md
-long_description_content_type = text/markdown
-url = https://github.com/spotify/confidence
-project_urls =
- Bug Tracker = https://github.com/spotify/confidence/issues
-classifiers =
- Programming Language :: Python :: 3
- License :: OSI Approved :: Apache Software License
- Operating System :: OS Independent
-
-[options]
-package_dir =
- = .
-packages = find:
-python_requires = >=3.9
-install_requires =
- numpy>=1.20.0,<2.0.0
- scipy>=1.6.0
- pandas>=1.2.0
- statsmodels>=0.13.0,<1.0.0
- chartify>=5.0.1
- ipywidgets>=8.0.0
-
-[options.packages.find]
-where = .
diff --git a/setup.py b/setup.py
deleted file mode 100644
index b908cbe..0000000
--- a/setup.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import setuptools
-
-setuptools.setup()
diff --git a/spotify_confidence/__init__.py b/spotify_confidence/__init__.py
index 6369a1f..d8e9f24 100644
--- a/spotify_confidence/__init__.py
+++ b/spotify_confidence/__init__.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from pkg_resources import require as _require
+from importlib.metadata import version as _version
from .analysis.bayesian.bayesian_models import BetaBinomial
from spotify_confidence.analysis.frequentist.chi_squared import ChiSquared
from spotify_confidence.analysis.frequentist.t_test import StudentsTTest
@@ -25,7 +25,7 @@
from . import examples
from .options import options
-__version__ = _require("spotify_confidence")[0].version
+__version__ = _version("spotify_confidence")
__all__ = [
"BetaBinomial",
diff --git a/spotify_confidence/analysis/confidence_utils.py b/spotify_confidence/analysis/confidence_utils.py
index 44db803..29f241a 100644
--- a/spotify_confidence/analysis/confidence_utils.py
+++ b/spotify_confidence/analysis/confidence_utils.py
@@ -250,5 +250,5 @@ def dfmatmul(x, y, outer=True):
def de_list_if_length_one(x):
"""Return first element of x if x is a list of length one"""
- is_iterable = type(x) != str and isinstance(x, Iterable)
+ is_iterable = not isinstance(x, str) and isinstance(x, Iterable)
return x[0] if is_iterable and len(x) == 1 else x
diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py
index 4c47c22..4ea6105 100644
--- a/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py
+++ b/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py
@@ -351,7 +351,7 @@ def compute_differences(
level_columns = get_remaning_groups(self._all_group_columns, groupby)
difference_df = self._compute_differences(
level_columns=level_columns,
- levels=[levels] if type(levels) == tuple else levels,
+ levels=[levels] if isinstance(levels, tuple) else levels,
absolute=absolute,
groupby=groupby,
level_as_reference=True,
diff --git a/spotify_confidence/samplesize/sample_size_calculator.py b/spotify_confidence/samplesize/sample_size_calculator.py
index 10b77d5..4c22506 100644
--- a/spotify_confidence/samplesize/sample_size_calculator.py
+++ b/spotify_confidence/samplesize/sample_size_calculator.py
@@ -422,18 +422,18 @@ def show_samplesize(
)
code_html = widgets.HTML(
"
"
- f"SampleSize.continuous(average_absolute_mde={ mde },\n"
- f" baseline_variance={ baseline },\n"
- f" alpha={ alpha },\n"
- f" power={ power },\n"
- f" treatments={ treatments },\n"
+ f"SampleSize.continuous(average_absolute_mde={mde},\n"
+ f" baseline_variance={baseline},\n"
+ f" alpha={alpha},\n"
+ f" power={power},\n"
+ f" treatments={treatments},\n"
f" comparisons="
- f"'{ comparisons }',\n"
+ f"'{comparisons}',\n"
f" treatment_costs="
- f"{ list(treatment_costs) },\n"
+ f"{list(treatment_costs)},\n"
f" treatment_allocations=None,\n"
f" bonferroni_correction="
- f"{ bonferroni_correction })"
+ f"{bonferroni_correction})"
"
"
)
else:
@@ -461,19 +461,19 @@ def show_samplesize(
)
code_html = widgets.HTML(
""
- f"SampleSize.binomial(absolute_percentage_mde={ mde },\n"
+ f"SampleSize.binomial(absolute_percentage_mde={mde},\n"
f" baseline_proportion="
- f"{ baseline },\n"
- f" alpha={ alpha },\n"
- f" power={ power },\n"
- f" treatments={ treatments },\n"
+ f"{baseline},\n"
+ f" alpha={alpha},\n"
+ f" power={power},\n"
+ f" treatments={treatments},\n"
f" comparisons="
- f"'{ comparisons }',\n"
+ f"'{comparisons}',\n"
f" treatment_costs="
- f"{ list(treatment_costs) },\n"
+ f"{list(treatment_costs)},\n"
f" treatment_allocations=None,\n"
f" bonferroni_correction="
- f"{ bonferroni_correction })"
+ f"{bonferroni_correction})"
"
"
)
@@ -482,7 +482,7 @@ def compare_against_optimal(current, optimal):
return ""
else:
return (
- f"
{current/optimal:.1f}x "
+ f"
{current / optimal:.1f}x "
f"optimal group allocation of {optimal:,}."
f""
)
@@ -501,7 +501,7 @@ def compare_against_optimal(current, optimal):
else:
treatment = "Variant " + str(i)
- cell_str += f"
{treatment}: " f"{n_cell[i]:,} ({prop_cell[i]*100:.1f}%)"
+ cell_str += f"
{treatment}: " f"{n_cell[i]:,} ({prop_cell[i] * 100:.1f}%)"
display(widgets.HTML(cell_str))
display(code_html)
diff --git a/tests/bayesian/test_betabinomial.py b/tests/bayesian/test_betabinomial.py
index 1779623..3a6bf74 100644
--- a/tests/bayesian/test_betabinomial.py
+++ b/tests/bayesian/test_betabinomial.py
@@ -9,7 +9,7 @@
class TestCategorical(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": ["test", "control", "test2", "test", "control", "test2"],
@@ -148,7 +148,7 @@ def test_multiple_difference_level_as_reference(self):
class TestOrdinal:
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -307,7 +307,7 @@ def test_multiple_difference_plot(self):
class TestOrdinalPlusTwoCategorical(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
diff --git a/tests/frequentist/test_bounds.py b/tests/frequentist/test_bounds.py
index 5c2a0f8..c85d7e9 100644
--- a/tests/frequentist/test_bounds.py
+++ b/tests/frequentist/test_bounds.py
@@ -1,5 +1,4 @@
import pandas as pd
-import pytest
import time
import numpy as np
from pandas import Timestamp
@@ -10,7 +9,6 @@
)
-@pytest.mark.skip(reason="Skipping because this test is very slow")
def test_many_days():
"""
This input (based on a real experiment) is very long, which can cause slow calculation
@@ -404,7 +402,6 @@ def test_many_days():
assert (time.time() - start_time) < 0.01
-@pytest.mark.skip(reason="Skipping because this test is very slow")
def test_many_days_fast_and_no_crash():
"""
This is based on experiment 1735 on 26.11.2020. The calculation of the corresponding bounds takes many minutes
diff --git a/tests/frequentist/test_chisquared.py b/tests/frequentist/test_chisquared.py
index 97e67e8..7b68aed 100644
--- a/tests/frequentist/test_chisquared.py
+++ b/tests/frequentist/test_chisquared.py
@@ -24,7 +24,7 @@ def chart_data(chart_object, series_name):
class TestCategorical(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -335,7 +335,7 @@ def test_achieved_power_groupby(self):
class TestOrdinal(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -514,7 +514,7 @@ def test_sample_ratio_test(self):
class TestOrdinalPlusTwoCategorical(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
diff --git a/tests/frequentist/test_experiment.py b/tests/frequentist/test_experiment.py
index ac4775c..34946a4 100644
--- a/tests/frequentist/test_experiment.py
+++ b/tests/frequentist/test_experiment.py
@@ -7,7 +7,7 @@
class TestBootstrap(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
n_bootstraps = int(5e5)
self.data = pd.DataFrame(
diff --git a/tests/frequentist/test_ttest.py b/tests/frequentist/test_ttest.py
index f310606..61735c6 100644
--- a/tests/frequentist/test_ttest.py
+++ b/tests/frequentist/test_ttest.py
@@ -36,7 +36,7 @@ def chart_data(chart_object, series_name):
class TestCategorical(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -244,7 +244,7 @@ def test_achieved_power(self):
class TestOrdinal(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -403,7 +403,7 @@ def test_achieved_power(self):
class TestOrdinalPlusTwoCategorical(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -805,7 +805,7 @@ def test_differece_plot_with_nims(self):
class TestCategoricalBinomialData(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -877,7 +877,7 @@ def test_multiple_difference(self):
class TestWithNims(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
[
{
diff --git a/tests/frequentist/test_ztest.py b/tests/frequentist/test_ztest.py
index b03b4f1..957029b 100644
--- a/tests/frequentist/test_ztest.py
+++ b/tests/frequentist/test_ztest.py
@@ -26,7 +26,7 @@
class TestPoweredEffectContinuousSingleMetric(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -87,7 +87,7 @@ def test_powered_effect2(self):
class TestPoweredEffectContinuousMultipleSuccessMetrics(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": ["test", "control", "test2", "test", "control", "test2"],
@@ -149,7 +149,7 @@ def test_powered_effect1(self):
class TestPoweredEffectContinuousMultipleMetricTypes(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": ["test", "control", "test2", "test", "control", "test2"],
@@ -212,7 +212,7 @@ def test_powered_effect(self):
class TestPoweredEffectContinuousMultipleMetricsSegments(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -342,7 +342,7 @@ def test_powered_effect(self):
class TestPoweredEffectContinuousMultipleMetricsSegments2(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -472,7 +472,7 @@ def test_powered_effect(self):
class TestPoweredEffectContinuousMultipleMetricsSegments3(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -605,7 +605,7 @@ def test_powered_effect(self):
class TestPoweredEffectBinary(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -750,7 +750,7 @@ def test_powered_effect(self):
class TestPoweredEffectBinaryOnlyGuardrail(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -816,7 +816,7 @@ def test_powered_effect(self):
class TestBinary(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -922,7 +922,7 @@ def test_multiple_difference_plot(self):
class TestCategoricalBinary(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -1038,7 +1038,7 @@ def test_multiple_difference_plot_groupby(self):
class TestCategoricalContinuous(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -1126,7 +1126,7 @@ def test_multiple_difference_plot_groupby(self):
class TestOrdinal(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -1230,7 +1230,7 @@ def test_multiple_difference_plot_groupby(self):
class TestOrdinalPlusTwoCategorical(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"variation_name": [
@@ -1892,7 +1892,7 @@ def test_differece_plot_with_nims_in_df(self):
class TestCategoricalBinomialData(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
self.data = pd.DataFrame(
@@ -2004,7 +2004,7 @@ def test_multiple_difference(self):
class TestWithNims(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
[
{
@@ -2109,7 +2109,7 @@ def test_one_sided_ztest_negative(self):
class TestSequentialOrdinalPlusTwoCategorical(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
d = 50 + 1 * np.random.randn(60)
u = np.floor(2000 + np.linspace(0, 1000, 60) + 10 * np.random.randn(60))
@@ -2636,7 +2636,7 @@ def test_multiple_difference_groupby_mixed_nims(self):
class TestSequentialOrdinalPlusTwoCategorical2(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
[
{
@@ -3406,7 +3406,7 @@ def test_multiple_difference_groupby(self):
class TestSequentialOneSided(object):
- def setup(self):
+ def setup_method(self):
DATE = "date"
COUNT = "count"
SUM = "sum"
@@ -3448,7 +3448,7 @@ def test_multiple_difference_groupby(self):
class TestSequentialTwoSided(object):
- def setup(self):
+ def setup_method(self):
DATE = "date"
COUNT = "count"
SUM = "sum"
@@ -3489,7 +3489,7 @@ def test_multiple_difference_groupby(self):
class TestSequentialOneSidedThreeGroups(object):
- def setup(self):
+ def setup_method(self):
DATE = "date"
COUNT = "count"
SUM = "sum"
@@ -3534,7 +3534,7 @@ def test_multiple_difference_groupby(self):
class TestNimsWithNaN(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
{
"count": {
diff --git a/tests/frequentist/test_ztest_linreg.py b/tests/frequentist/test_ztest_linreg.py
index eecd4ad..cb81a14 100644
--- a/tests/frequentist/test_ztest_linreg.py
+++ b/tests/frequentist/test_ztest_linreg.py
@@ -7,7 +7,7 @@
class TestUnivariateSingleMetric(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
n = 10000
d = np.random.randint(2, size=n)
@@ -74,7 +74,7 @@ def linreg(X, y):
class TestUnivariateMultiMetric(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
n = 20000
d = np.random.randint(2, size=n)
@@ -164,7 +164,7 @@ def linreg(X, y):
class TestUnivariateNoFeatures(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
[
{
@@ -214,7 +214,7 @@ def test_summary(self):
class TestMultivariateSingleMetric(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
n = 10000
@@ -316,7 +316,7 @@ def linreg(X, y):
class TestMultivariateMultipleMetrics(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
n = 10000
@@ -441,7 +441,7 @@ def linreg(X, y):
class TestUnivariateMultiMetricRequiredSampleSize(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
n = 2000000
d = np.random.randint(2, size=n)
@@ -512,7 +512,7 @@ def test_parameters_univariate_required_sample_size(self):
class TestUnivariateSingleMetricWithBadPreExposureData(object):
- def setup(self):
+ def setup_method(self):
np.random.seed(123)
n = 10000
d = np.random.randint(2, size=n)
@@ -569,7 +569,7 @@ def test_parameters_univariate(self):
class TestUnivariateSingleMetricNegativeVariance(object):
- def setup(self):
+ def setup_method(self):
self.data = pd.DataFrame(
[
{
diff --git a/tox.ini b/tox.ini
index fc4e692..064fa09 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,37 +1,50 @@
[tox]
-envlist = python3.9, python3.10, python3.11
-skipsdist = True
-usedevelop = True
-
-[travis]
-python =
- 3.9: python3.9
- 3.10: python3.10
- 3.11: python3.11
+envlist = py39, py310, py311, py312, py39-min
+isolated_build = True
+requires = tox-uv
[gh-actions]
python =
- 3.9: python3.9
- 3.10: python3.10
- 3.11: python3.11
+ 3.9: py39-min
+ 3.10: py310
+ 3.11: py311
+ 3.12: py312
[testenv]
-setenv =
- PYTHONPATH = {toxinidir}
+extras = dev
+commands =
+ black --check --diff spotify_confidence tests
+ flake8 spotify_confidence tests
+ pytest -n auto --no-cov --basetemp={envtmpdir} {posargs}
+
+[testenv:py312]
+extras = dev
+commands =
+ black --check --diff spotify_confidence tests
+ flake8 spotify_confidence tests
+ pytest -n auto --basetemp={envtmpdir} {posargs}
+
+[testenv:py39-min]
+basepython = python3.9
deps =
- -r{toxinidir}/requirements_dev.txt
+ numpy==1.21.0
+ scipy==1.9.0
+ pandas==1.4.0
+ statsmodels==0.13.5
+ chartify==5.0.0
+ ipywidgets==8.0.0
+ black==23.7.0
+ flake8==6.0.0
+ pytest==7.0.0
+ pytest-cov==4.0.0
+ pytest-xdist==3.0.2
+ coverage==7.0.0
commands =
- flake8 {posargs}
- coverage erase
- py.test {posargs}
+ black --check --diff spotify_confidence tests
+ flake8 spotify_confidence tests
+ pytest -n auto --no-cov --basetemp={envtmpdir} {posargs}
[flake8]
-show-source = true
max-line-length = 120
+ignore = E203,E231,W503
exclude = .venv,.tox,.git,dist,docs,*.egg,build,scratch.py,confidence_dev
-ignore = E203, W503
-
-[pytest]
-addopts = -v --cov=spotify_confidence --cov-report=html --cov-report=xml --cov-report=term-missing
-testpaths = tests
-