diff --git a/.flake8 b/.flake8 deleted file mode 100644 index adf399e..0000000 --- a/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 120 -ignore = E203,E231,W503 diff --git a/.github/workflows/confidence.yml b/.github/workflows/confidence.yml index d657dc6..c4553ed 100644 --- a/.github/workflows/confidence.yml +++ b/.github/workflows/confidence.yml @@ -13,18 +13,22 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" - name: Install dependencies run: | - python -m pip install --upgrade pip - if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi - python -m pip install tox tox-gh-actions + uv pip install --system -e ".[dev]" + uv pip install --system tox tox-gh-actions - name: Test with tox run: tox diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4f2e205..fb9d87f 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -18,11 +18,11 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.gitignore b/.gitignore index 9cd5fd6..edcb3af 100644 --- a/.gitignore +++ b/.gitignore @@ -90,4 +90,8 @@ ENV/ .DS_store -.idea/ \ No newline at end of file +.idea/ + +# uv +uv.lock +.venv/ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..fae842d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,157 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Spotify Confidence is a Python library for A/B test analysis. It provides convenience wrappers around statsmodel's functions for computing p-values and confidence intervals. The library supports both frequentist (Z-test, Student's T-test, Chi-squared) and Bayesian (BetaBinomial) statistical methods, with features for variance reduction, sequential testing, and sample size calculations. + +## Development Commands + +### Setup +```bash +# Install with development dependencies (including tox-uv) +uv pip install -e ".[dev]" +``` + +### Testing +```bash +# Run all tests with coverage +uv run pytest + +# Run tests without coverage reports +uv run pytest --no-cov + +# Run specific test file +uv run pytest tests/frequentist/test_z_test.py + +# Run specific test +uv run pytest tests/frequentist/test_z_test.py::test_name + +# Run all tests across Python versions +uv run tox +``` + +### Code Quality +```bash +# Format code with black (line length: 119) +uv run black spotify_confidence tests + +# Check formatting without making changes +uv run black --check --diff spotify_confidence tests + +# Lint with flake8 (max line length: 120) +uv run flake8 spotify_confidence tests + +# Run all quality checks (as done in CI) +uv run black --check --diff spotify_confidence tests && uv run flake8 spotify_confidence tests && uv run pytest +``` + +### Build +```bash +# Build distribution packages +uv run python -m build +``` + +## Architecture + +### Core Design Pattern + +The library follows an object-oriented design with separation of concerns: + +1. **Statistical Test Classes**: High-level APIs (`ZTest`, `StudentsTTest`, `ChiSquared`, `BetaBinomial`, `ZTestLinreg`) +2. **Experiment Class**: Base class containing shared analysis methods for frequentist tests +3. **Computer Classes**: Perform the actual statistical computations +4. **Grapher Classes**: Generate visualizations using Chartify + +All main test classes inherit from abstract base classes in `spotify_confidence/analysis/abstract_base_classes/`: +- `ConfidenceABC`: Base for all statistical test classes +- `ConfidenceComputerABC`: Base for computation logic +- `ConfidenceGrapherABC`: Base for visualization logic + +### Module Structure + +``` +spotify_confidence/ +├── analysis/ +│ ├── abstract_base_classes/ # ABC definitions for the framework +│ ├── frequentist/ # Frequentist statistical methods +│ │ ├── confidence_computers/ # Statistical computation logic +│ │ ├── experiment.py # Base class for frequentist tests +│ │ ├── z_test.py # Z-test implementation +│ │ ├── t_test.py # Student's T-test implementation +│ │ ├── chi_squared.py # Chi-squared test +│ │ ├── z_test_linreg.py # Z-test with linear regression variance reduction +│ │ ├── sequential_bound_solver.py # Group sequential testing +│ │ ├── multiple_comparison.py # Multiple testing correction +│ │ └── sample_size_calculator.py +│ ├── bayesian/ # Bayesian methods +│ │ └── bayesian_models.py # BetaBinomial implementation +│ ├── constants.py # Shared constants +│ └── confidence_utils.py # Shared utility functions +├── samplesize/ # Sample size calculations +├── examples.py # Example data generators +├── chartgrid.py # Chart grid utilities +└── options.py # Global configuration +``` + +### Key Classes and Their Relationships + +- **Experiment** (in `frequentist/experiment.py`): The core base class for frequentist tests. Provides methods like: + - `summary()`: Overall metric summaries + - `difference()`: Pairwise comparisons + - `multiple_difference()`: Multiple comparisons with correction + - `difference_plot()`, `summary_plot()`, etc.: Visualization methods + - `sample_size()`: Required sample size calculations + - `statistical_power()`: Power analysis + +- **ZTest, StudentsTTest, ChiSquared**: Thin wrappers that initialize `Experiment` with the appropriate computer and method + +- **Computer Classes** (in `frequentist/confidence_computers/`): Handle the statistical calculations + - `ZTestComputer`, `TTestComputer`, `ChiSquaredComputer`: Specific computation implementations + - All inherit from `ConfidenceComputerABC` + +- **ChartifyGrapher**: Implements visualization using the Chartify library + +### Data Model + +The library works with DataFrames containing sufficient statistics: +- `numerator_column`: Sum or count (e.g., sum of conversions) +- `denominator_column`: Total observations (e.g., total users) +- `numerator_sum_squares_column`: Sum of squares (optional, for variance calculations) +- `categorical_group_columns`: Treatment/control groups and other dimensions +- `ordinal_group_column`: Time-based grouping for sequential analysis + +### Important Conventions + +1. **Method Column**: Tests add a `METHOD_COLUMN_NAME` to data indicating the test type (e.g., "z-test", "t-test") + +2. **Multiple Comparison Correction**: Supported methods defined in `constants.py`: + - Standard: bonferroni, holm, hommel, sidak, FDR methods + - SPOT-1 variants: Custom Spotify methods for specific use cases + +3. **Non-Inferiority Margins (NIMs)**: Can be specified as absolute values or relative percentages + +4. **Sequential Testing**: The `sequential_bound_solver.py` module implements group sequential designs with spending functions + +5. **Variance Reduction**: `ZTestLinreg` uses pre-exposure data to fit a linear model and reduce variance (CUPED method) + +## Testing Guidelines + +- Tests are organized to mirror the source structure under `tests/` +- Use pytest fixtures for common test data +- Tests check both DataFrame outputs and chart generation +- Coverage target is configured in `pyproject.toml` + +## Python Version Support + +Supports Python 3.9, 3.10, 3.11, and 3.12. The `tox.ini` includes a `py39-min` environment that tests with minimum dependency versions. + +The project uses `tox-uv` to leverage uv's fast package installation and environment management in tox, significantly speeding up multi-environment testing. The GitHub Actions CI workflow also uses uv for faster dependency installation. + +## Code Style + +- Black formatting with 119 character line length +- Flake8 linting with max line length 120 +- Ignored flake8 rules: E203, E231, W503 +- Excluded from linting: `.venv`, `.tox`, `dist`, `build`, `scratch.py`, `confidence_dev` diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b175338..86b3bd7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -57,41 +57,55 @@ Get Started! Ready to contribute? Here's how to set up `confidence` for local development. +**Prerequisites:** + +* `uv `_ - Fast Python package installer (recommended) +* Python 3.9 or later + 1. Fork the `confidence` repo on GitHub. 2. Clone your fork locally:: - $ git clone https://github.com/spotify/confidence + $ git clone git@github.com:your_username/confidence.git + $ cd confidence + +3. Set up your development environment using uv:: + + $ uv venv + $ uv pip install -e ".[dev]" -3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: + This creates a virtual environment and installs the package in editable mode with all development dependencies. - $ mkvirtualenv confidence_dev - $ cd confidence/ - $ tox +4. Verify your setup by running the tests:: - The tox command will install the dev requirements in requirements_dev.txt and run all tests. + $ uv run pytest -4. Create a branch for local development:: + This should run all tests and show they pass. + +5. Create a branch for local development:: $ git checkout -b name-of-your-bugfix-or-feature Now you can make your changes locally. -5. When you're done making changes, format using `make black`, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: +6. When you're done making changes, check that your changes pass all quality checks:: + + $ uv run black spotify_confidence tests --line-length 119 # Format code + $ uv run flake8 spotify_confidence tests # Lint code + $ uv run pytest # Run tests + + To test across all supported Python versions (3.9, 3.10, 3.11, 3.12):: - $ make black - $ flake8 confidence tests - $ python setup.py test or py.test - $ tox + $ uv run tox -p auto - To get flake8 and tox, just pip install them into your virtualenv. + Note: tox requires all Python versions to be installed on your system. -6. Commit your changes and push your branch to GitHub:: +7. Commit your changes and push your branch to GitHub:: $ git add . $ git commit -m "Your detailed description of your changes." $ git push origin name-of-your-bugfix-or-feature -7. Submit a pull request through the GitHub website. +8. Submit a pull request through the GitHub website. Pull Request Guidelines ----------------------- @@ -101,23 +115,36 @@ Before you submit a pull request, check that it meets these guidelines: 1. The pull request should include tests. 2. If the pull request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring, and add the - feature to the list in README.rst. -3. The pull request should work for Python 3.6 and 3.7. Check - and make sure that the tests pass for all supported Python versions. + feature to the list in README.md. +3. The pull request should work for Python 3.9, 3.10, 3.11, and 3.12. The CI + pipeline will automatically test all supported Python versions. Tips ---- To run a subset of tests:: -$ py.test tests.test_confidence + $ uv run pytest tests/frequentist/test_ttest.py + +To run a specific test:: + + $ uv run pytest tests/frequentist/test_ttest.py::TestCategorical::test_summary + +To run tests with verbose output:: + + $ uv run pytest -v + +To see test coverage:: + + $ uv run pytest --cov=spotify_confidence --cov-report=html + $ open htmlcov/index.html Release Process ----------------------- While commits and pull requests are welcome from any contributor, we try to -simplify the distribution process for everyone by managing the release +simplify the distribution process for everyone by managing the release process with specific contributors serving in the role of Release Managers. Release Managers are responsible for: @@ -142,7 +169,7 @@ PATCH version when you make backwards-compatible bug fixes. Release Stategy ~~~~~~~~~~~~~~~~ -Each new release will be made on its own branch, with the branch Master +Each new release will be made on its own branch, with the branch Master representing the most recent, furthest release. Releases are published to PyPi automatically once a new release branch is merged to Master. Additionally, rew releases are also tracked manually on `github diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 2353e85..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,10 +0,0 @@ -include README.md -include CONTRIBUTING.md -include AUTHORS.md - -recursive-include tests * -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] - -recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif *.py -recursive-include confidence/ *.rst conf.py Makefile make.bat *.jpg *.png *.gif *.py \ No newline at end of file diff --git a/Makefile b/Makefile index b8f0ff3..8c0ff85 100644 --- a/Makefile +++ b/Makefile @@ -47,14 +47,17 @@ clean-test: ## remove test and coverage artifacts rm -f .coverage rm -fr htmlcov/ +format: ## format code with black + black spotify_confidence tests --line-length 119 + lint: ## check style with flake8 - flake8 confidence tests + flake8 spotify_confidence tests test: ## run tests quickly with the default Python python3 -m pytest coverage: ## check code coverage quickly with the default Python - coverage run --source confidence -m pytest + coverage run --source spotify_confidence -m pytest coverage report -m coverage html $(BROWSER) htmlcov/index.html @@ -86,10 +89,8 @@ install: clean ## install the package to the active Python's site-packages pip install -e . install-test: clean - pip3 install --index-url https://test.pypi.org/simple/ confidence-spotify + pip3 install --index-url https://test.pypi.org/simple/ spotify-confidence install-prod: clean - pip3 install confidence-spotify + pip3 install spotify-confidence -black: - black spotify_confidence tests --line-length 119 diff --git a/pyproject.toml b/pyproject.toml index f6c1689..57fda33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,62 @@ [build-system] -requires = [ - "setuptools>=42", - "wheel", -] +requires = ["setuptools>=61.2"] build-backend = "setuptools.build_meta" + +[project] +name = "spotify-confidence" +version = "4.0.0" +description = "Package for calculating and visualising confidence intervals, e.g. for A/B test analysis." +readme = "README.md" +license = {file = "LICENSE"} +authors = [{name = "Per Sillren", email = "pers@spotify.com"}] +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", +] +dependencies = [ + "numpy>=1.21.0", + "scipy>=1.9.0", + "pandas>=1.4.0", + "statsmodels>=0.13.5", + "chartify>=5.0.0", + "ipywidgets>=8.0.0", +] + +[project.optional-dependencies] +dev = [ + "build", + "twine", + "black>=23.7.0", + "flake8>=6.0.0", + "tox>=4.0.0", + "tox-uv>=1.0.0", + "pytest>=7.0.0", + "pytest-cov>=4.0.0", + "pytest-xdist>=3.0.2", + "coverage>=7.0.0", +] + +[project.urls] +Homepage = "https://github.com/spotify/confidence" +"Bug Tracker" = "https://github.com/spotify/confidence/issues" + +[tool.setuptools.packages.find] +where = ["."] +include = ["spotify_confidence*"] +namespaces = false + +[tool.black] +line-length = 119 +target-version = ["py39", "py310", "py311", "py312"] + +[tool.pytest.ini_options] +addopts = "-v -n auto --cov=spotify_confidence --cov-report=html --cov-report=xml --cov-report=term-missing" +testpaths = ["tests"] + +[tool.coverage.run] +source = ["spotify_confidence"] + +[tool.coverage.report] +show_missing = true diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index cf02b2e..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,20 +0,0 @@ --e . -pip==23.0.1 -build -twine -bumpversion==0.5.3 -watchdog==0.8.3 -flake8==4.0.1 -tox==4.4.7 -Sphinx==1.4.8 -pytest-runner==6.0.0 -jupyterlab==3.2.9 -pylint==1.7.4 -coverage==4.5.1 -pytest==7.0.1 -pytest-cov==2.5.1 -ipywidgets>=7.1.0 -black==23.1.0 -ipython>=8.10.0 # not directly required, pinned by Snyk to avoid a vulnerability -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 4186a42..0000000 --- a/setup.cfg +++ /dev/null @@ -1,31 +0,0 @@ -[metadata] -name = spotify-confidence -version = 4.0.0 -author = Per Sillren -author_email = pers@spotify.com -description = Package for calculating and visualising confidence intervals, e.g. for A/B test analysis. -long_description = file: README.md -long_description_content_type = text/markdown -url = https://github.com/spotify/confidence -project_urls = - Bug Tracker = https://github.com/spotify/confidence/issues -classifiers = - Programming Language :: Python :: 3 - License :: OSI Approved :: Apache Software License - Operating System :: OS Independent - -[options] -package_dir = - = . -packages = find: -python_requires = >=3.9 -install_requires = - numpy>=1.20.0,<2.0.0 - scipy>=1.6.0 - pandas>=1.2.0 - statsmodels>=0.13.0,<1.0.0 - chartify>=5.0.1 - ipywidgets>=8.0.0 - -[options.packages.find] -where = . diff --git a/setup.py b/setup.py deleted file mode 100644 index b908cbe..0000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -import setuptools - -setuptools.setup() diff --git a/spotify_confidence/__init__.py b/spotify_confidence/__init__.py index 6369a1f..d8e9f24 100644 --- a/spotify_confidence/__init__.py +++ b/spotify_confidence/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pkg_resources import require as _require +from importlib.metadata import version as _version from .analysis.bayesian.bayesian_models import BetaBinomial from spotify_confidence.analysis.frequentist.chi_squared import ChiSquared from spotify_confidence.analysis.frequentist.t_test import StudentsTTest @@ -25,7 +25,7 @@ from . import examples from .options import options -__version__ = _require("spotify_confidence")[0].version +__version__ = _version("spotify_confidence") __all__ = [ "BetaBinomial", diff --git a/spotify_confidence/analysis/confidence_utils.py b/spotify_confidence/analysis/confidence_utils.py index 44db803..29f241a 100644 --- a/spotify_confidence/analysis/confidence_utils.py +++ b/spotify_confidence/analysis/confidence_utils.py @@ -250,5 +250,5 @@ def dfmatmul(x, y, outer=True): def de_list_if_length_one(x): """Return first element of x if x is a list of length one""" - is_iterable = type(x) != str and isinstance(x, Iterable) + is_iterable = not isinstance(x, str) and isinstance(x, Iterable) return x[0] if is_iterable and len(x) == 1 else x diff --git a/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py b/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py index 4c47c22..4ea6105 100644 --- a/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py +++ b/spotify_confidence/analysis/frequentist/confidence_computers/confidence_computer.py @@ -351,7 +351,7 @@ def compute_differences( level_columns = get_remaning_groups(self._all_group_columns, groupby) difference_df = self._compute_differences( level_columns=level_columns, - levels=[levels] if type(levels) == tuple else levels, + levels=[levels] if isinstance(levels, tuple) else levels, absolute=absolute, groupby=groupby, level_as_reference=True, diff --git a/spotify_confidence/samplesize/sample_size_calculator.py b/spotify_confidence/samplesize/sample_size_calculator.py index 10b77d5..4c22506 100644 --- a/spotify_confidence/samplesize/sample_size_calculator.py +++ b/spotify_confidence/samplesize/sample_size_calculator.py @@ -422,18 +422,18 @@ def show_samplesize( ) code_html = widgets.HTML( "
"
-                    f"SampleSize.continuous(average_absolute_mde={ mde },\n"
-                    f"                      baseline_variance={ baseline },\n"
-                    f"                      alpha={ alpha },\n"
-                    f"                      power={ power },\n"
-                    f"                      treatments={ treatments },\n"
+                    f"SampleSize.continuous(average_absolute_mde={mde},\n"
+                    f"                      baseline_variance={baseline},\n"
+                    f"                      alpha={alpha},\n"
+                    f"                      power={power},\n"
+                    f"                      treatments={treatments},\n"
                     f"                      comparisons="
-                    f"'{ comparisons }',\n"
+                    f"'{comparisons}',\n"
                     f"                      treatment_costs="
-                    f"{ list(treatment_costs) },\n"
+                    f"{list(treatment_costs)},\n"
                     f"                      treatment_allocations=None,\n"
                     f"                      bonferroni_correction="
-                    f"{ bonferroni_correction })"
+                    f"{bonferroni_correction})"
                     "
" ) else: @@ -461,19 +461,19 @@ def show_samplesize( ) code_html = widgets.HTML( "
"
-                    f"SampleSize.binomial(absolute_percentage_mde={ mde },\n"
+                    f"SampleSize.binomial(absolute_percentage_mde={mde},\n"
                     f"                    baseline_proportion="
-                    f"{ baseline },\n"
-                    f"                    alpha={ alpha },\n"
-                    f"                    power={ power },\n"
-                    f"                    treatments={ treatments },\n"
+                    f"{baseline},\n"
+                    f"                    alpha={alpha},\n"
+                    f"                    power={power},\n"
+                    f"                    treatments={treatments},\n"
                     f"                    comparisons="
-                    f"'{ comparisons }',\n"
+                    f"'{comparisons}',\n"
                     f"                    treatment_costs="
-                    f"{ list(treatment_costs) },\n"
+                    f"{list(treatment_costs)},\n"
                     f"                    treatment_allocations=None,\n"
                     f"                    bonferroni_correction="
-                    f"{ bonferroni_correction })"
+                    f"{bonferroni_correction})"
                     "
" ) @@ -482,7 +482,7 @@ def compare_against_optimal(current, optimal): return "" else: return ( - f"
{current/optimal:.1f}x " + f"
{current / optimal:.1f}x " f"optimal group allocation of {optimal:,}." f"" ) @@ -501,7 +501,7 @@ def compare_against_optimal(current, optimal): else: treatment = "Variant " + str(i) - cell_str += f"
{treatment}: " f"{n_cell[i]:,} ({prop_cell[i]*100:.1f}%)" + cell_str += f"
{treatment}: " f"{n_cell[i]:,} ({prop_cell[i] * 100:.1f}%)" display(widgets.HTML(cell_str)) display(code_html) diff --git a/tests/bayesian/test_betabinomial.py b/tests/bayesian/test_betabinomial.py index 1779623..3a6bf74 100644 --- a/tests/bayesian/test_betabinomial.py +++ b/tests/bayesian/test_betabinomial.py @@ -9,7 +9,7 @@ class TestCategorical(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": ["test", "control", "test2", "test", "control", "test2"], @@ -148,7 +148,7 @@ def test_multiple_difference_level_as_reference(self): class TestOrdinal: - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -307,7 +307,7 @@ def test_multiple_difference_plot(self): class TestOrdinalPlusTwoCategorical(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ diff --git a/tests/frequentist/test_bounds.py b/tests/frequentist/test_bounds.py index 5c2a0f8..c85d7e9 100644 --- a/tests/frequentist/test_bounds.py +++ b/tests/frequentist/test_bounds.py @@ -1,5 +1,4 @@ import pandas as pd -import pytest import time import numpy as np from pandas import Timestamp @@ -10,7 +9,6 @@ ) -@pytest.mark.skip(reason="Skipping because this test is very slow") def test_many_days(): """ This input (based on a real experiment) is very long, which can cause slow calculation @@ -404,7 +402,6 @@ def test_many_days(): assert (time.time() - start_time) < 0.01 -@pytest.mark.skip(reason="Skipping because this test is very slow") def test_many_days_fast_and_no_crash(): """ This is based on experiment 1735 on 26.11.2020. The calculation of the corresponding bounds takes many minutes diff --git a/tests/frequentist/test_chisquared.py b/tests/frequentist/test_chisquared.py index 97e67e8..7b68aed 100644 --- a/tests/frequentist/test_chisquared.py +++ b/tests/frequentist/test_chisquared.py @@ -24,7 +24,7 @@ def chart_data(chart_object, series_name): class TestCategorical(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -335,7 +335,7 @@ def test_achieved_power_groupby(self): class TestOrdinal(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -514,7 +514,7 @@ def test_sample_ratio_test(self): class TestOrdinalPlusTwoCategorical(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ diff --git a/tests/frequentist/test_experiment.py b/tests/frequentist/test_experiment.py index ac4775c..34946a4 100644 --- a/tests/frequentist/test_experiment.py +++ b/tests/frequentist/test_experiment.py @@ -7,7 +7,7 @@ class TestBootstrap(object): - def setup(self): + def setup_method(self): np.random.seed(123) n_bootstraps = int(5e5) self.data = pd.DataFrame( diff --git a/tests/frequentist/test_ttest.py b/tests/frequentist/test_ttest.py index f310606..61735c6 100644 --- a/tests/frequentist/test_ttest.py +++ b/tests/frequentist/test_ttest.py @@ -36,7 +36,7 @@ def chart_data(chart_object, series_name): class TestCategorical(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -244,7 +244,7 @@ def test_achieved_power(self): class TestOrdinal(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -403,7 +403,7 @@ def test_achieved_power(self): class TestOrdinalPlusTwoCategorical(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -805,7 +805,7 @@ def test_differece_plot_with_nims(self): class TestCategoricalBinomialData(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -877,7 +877,7 @@ def test_multiple_difference(self): class TestWithNims(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( [ { diff --git a/tests/frequentist/test_ztest.py b/tests/frequentist/test_ztest.py index b03b4f1..957029b 100644 --- a/tests/frequentist/test_ztest.py +++ b/tests/frequentist/test_ztest.py @@ -26,7 +26,7 @@ class TestPoweredEffectContinuousSingleMetric(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -87,7 +87,7 @@ def test_powered_effect2(self): class TestPoweredEffectContinuousMultipleSuccessMetrics(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": ["test", "control", "test2", "test", "control", "test2"], @@ -149,7 +149,7 @@ def test_powered_effect1(self): class TestPoweredEffectContinuousMultipleMetricTypes(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": ["test", "control", "test2", "test", "control", "test2"], @@ -212,7 +212,7 @@ def test_powered_effect(self): class TestPoweredEffectContinuousMultipleMetricsSegments(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -342,7 +342,7 @@ def test_powered_effect(self): class TestPoweredEffectContinuousMultipleMetricsSegments2(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -472,7 +472,7 @@ def test_powered_effect(self): class TestPoweredEffectContinuousMultipleMetricsSegments3(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -605,7 +605,7 @@ def test_powered_effect(self): class TestPoweredEffectBinary(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -750,7 +750,7 @@ def test_powered_effect(self): class TestPoweredEffectBinaryOnlyGuardrail(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -816,7 +816,7 @@ def test_powered_effect(self): class TestBinary(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -922,7 +922,7 @@ def test_multiple_difference_plot(self): class TestCategoricalBinary(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -1038,7 +1038,7 @@ def test_multiple_difference_plot_groupby(self): class TestCategoricalContinuous(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -1126,7 +1126,7 @@ def test_multiple_difference_plot_groupby(self): class TestOrdinal(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -1230,7 +1230,7 @@ def test_multiple_difference_plot_groupby(self): class TestOrdinalPlusTwoCategorical(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "variation_name": [ @@ -1892,7 +1892,7 @@ def test_differece_plot_with_nims_in_df(self): class TestCategoricalBinomialData(object): - def setup(self): + def setup_method(self): np.random.seed(123) self.data = pd.DataFrame( @@ -2004,7 +2004,7 @@ def test_multiple_difference(self): class TestWithNims(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( [ { @@ -2109,7 +2109,7 @@ def test_one_sided_ztest_negative(self): class TestSequentialOrdinalPlusTwoCategorical(object): - def setup(self): + def setup_method(self): np.random.seed(123) d = 50 + 1 * np.random.randn(60) u = np.floor(2000 + np.linspace(0, 1000, 60) + 10 * np.random.randn(60)) @@ -2636,7 +2636,7 @@ def test_multiple_difference_groupby_mixed_nims(self): class TestSequentialOrdinalPlusTwoCategorical2(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( [ { @@ -3406,7 +3406,7 @@ def test_multiple_difference_groupby(self): class TestSequentialOneSided(object): - def setup(self): + def setup_method(self): DATE = "date" COUNT = "count" SUM = "sum" @@ -3448,7 +3448,7 @@ def test_multiple_difference_groupby(self): class TestSequentialTwoSided(object): - def setup(self): + def setup_method(self): DATE = "date" COUNT = "count" SUM = "sum" @@ -3489,7 +3489,7 @@ def test_multiple_difference_groupby(self): class TestSequentialOneSidedThreeGroups(object): - def setup(self): + def setup_method(self): DATE = "date" COUNT = "count" SUM = "sum" @@ -3534,7 +3534,7 @@ def test_multiple_difference_groupby(self): class TestNimsWithNaN(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( { "count": { diff --git a/tests/frequentist/test_ztest_linreg.py b/tests/frequentist/test_ztest_linreg.py index eecd4ad..cb81a14 100644 --- a/tests/frequentist/test_ztest_linreg.py +++ b/tests/frequentist/test_ztest_linreg.py @@ -7,7 +7,7 @@ class TestUnivariateSingleMetric(object): - def setup(self): + def setup_method(self): np.random.seed(123) n = 10000 d = np.random.randint(2, size=n) @@ -74,7 +74,7 @@ def linreg(X, y): class TestUnivariateMultiMetric(object): - def setup(self): + def setup_method(self): np.random.seed(123) n = 20000 d = np.random.randint(2, size=n) @@ -164,7 +164,7 @@ def linreg(X, y): class TestUnivariateNoFeatures(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( [ { @@ -214,7 +214,7 @@ def test_summary(self): class TestMultivariateSingleMetric(object): - def setup(self): + def setup_method(self): np.random.seed(123) n = 10000 @@ -316,7 +316,7 @@ def linreg(X, y): class TestMultivariateMultipleMetrics(object): - def setup(self): + def setup_method(self): np.random.seed(123) n = 10000 @@ -441,7 +441,7 @@ def linreg(X, y): class TestUnivariateMultiMetricRequiredSampleSize(object): - def setup(self): + def setup_method(self): np.random.seed(123) n = 2000000 d = np.random.randint(2, size=n) @@ -512,7 +512,7 @@ def test_parameters_univariate_required_sample_size(self): class TestUnivariateSingleMetricWithBadPreExposureData(object): - def setup(self): + def setup_method(self): np.random.seed(123) n = 10000 d = np.random.randint(2, size=n) @@ -569,7 +569,7 @@ def test_parameters_univariate(self): class TestUnivariateSingleMetricNegativeVariance(object): - def setup(self): + def setup_method(self): self.data = pd.DataFrame( [ { diff --git a/tox.ini b/tox.ini index fc4e692..064fa09 100644 --- a/tox.ini +++ b/tox.ini @@ -1,37 +1,50 @@ [tox] -envlist = python3.9, python3.10, python3.11 -skipsdist = True -usedevelop = True - -[travis] -python = - 3.9: python3.9 - 3.10: python3.10 - 3.11: python3.11 +envlist = py39, py310, py311, py312, py39-min +isolated_build = True +requires = tox-uv [gh-actions] python = - 3.9: python3.9 - 3.10: python3.10 - 3.11: python3.11 + 3.9: py39-min + 3.10: py310 + 3.11: py311 + 3.12: py312 [testenv] -setenv = - PYTHONPATH = {toxinidir} +extras = dev +commands = + black --check --diff spotify_confidence tests + flake8 spotify_confidence tests + pytest -n auto --no-cov --basetemp={envtmpdir} {posargs} + +[testenv:py312] +extras = dev +commands = + black --check --diff spotify_confidence tests + flake8 spotify_confidence tests + pytest -n auto --basetemp={envtmpdir} {posargs} + +[testenv:py39-min] +basepython = python3.9 deps = - -r{toxinidir}/requirements_dev.txt + numpy==1.21.0 + scipy==1.9.0 + pandas==1.4.0 + statsmodels==0.13.5 + chartify==5.0.0 + ipywidgets==8.0.0 + black==23.7.0 + flake8==6.0.0 + pytest==7.0.0 + pytest-cov==4.0.0 + pytest-xdist==3.0.2 + coverage==7.0.0 commands = - flake8 {posargs} - coverage erase - py.test {posargs} + black --check --diff spotify_confidence tests + flake8 spotify_confidence tests + pytest -n auto --no-cov --basetemp={envtmpdir} {posargs} [flake8] -show-source = true max-line-length = 120 +ignore = E203,E231,W503 exclude = .venv,.tox,.git,dist,docs,*.egg,build,scratch.py,confidence_dev -ignore = E203, W503 - -[pytest] -addopts = -v --cov=spotify_confidence --cov-report=html --cov-report=xml --cov-report=term-missing -testpaths = tests -