From 7023be5f7135a05788db657c295ae6642f2743c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Christoffer=20Lindstr=C3=B8m?= Date: Sun, 2 Aug 2020 21:10:05 +0000 Subject: [PATCH 1/4] version 0.3.1 --- DESCRIPTION | 6 +-- MD5 | 10 ++--- NEWS.md | 4 ++ R/implied_probabilities.R | 49 ++++++++++++++++++++-- inst/doc/introduction.html | 4 +- tests/testthat/test_1.R | 84 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 143 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 624f405..b654768 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: implied Type: Package Title: Convert Bookmaker Odds to Probabilities -Version: 0.3.0 +Version: 0.3.1 Author: Jonas Christoffer Lindstrøm Maintainer: Jonas Christoffer Lindstrøm Description: Convert bookmaker odds into proper probabilities. Seven different @@ -14,6 +14,6 @@ RoxygenNote: 7.0.2 Suggests: testthat (>= 2.0.1), knitr, rmarkdown VignetteBuilder: knitr NeedsCompilation: no -Packaged: 2020-04-18 20:55:05 UTC; jonas +Packaged: 2020-08-02 17:43:32 UTC; jonas Repository: CRAN -Date/Publication: 2020-04-19 00:10:02 UTC +Date/Publication: 2020-08-02 22:10:05 UTC diff --git a/MD5 b/MD5 index f2b94a3..6f1f8f3 100644 --- a/MD5 +++ b/MD5 @@ -1,13 +1,13 @@ -e1c144d5150fe622cb088097d29ffaf7 *DESCRIPTION +50d25839dec1f2c350722175bd78cb6a *DESCRIPTION 75b5ea970258de895ca96f3181b8d226 *NAMESPACE -31329041bcfc965c10df22f350a6374d *NEWS.md -55da476a7af11123d0eb8ec856e198b1 *R/implied_probabilities.R +5ec759872137c3f7c1c492e3cf60fdb7 *NEWS.md +f33f7af4c08b2c4172b1973ea88e6736 *R/implied_probabilities.R c71d10f8db03d579fa1c0c6e76959a7f *R/zzz.R 36fd139695e3baa1e55cba43a4853575 *build/vignette.rds aa3c1d34d43a0625c11e32d252ef5614 *inst/doc/introduction.R 71fa897a314a5fac9c5cdbe6693f7ab0 *inst/doc/introduction.Rmd -38b8657a46510b13acf377dfb886952c *inst/doc/introduction.html +1cdb8370609f1d7633ab37e2f45a44f3 *inst/doc/introduction.html 4bddfd68c8d5de61751fe82910dd1e27 *man/implied_probabilities.Rd bc77ebadaa37370915e00adac9036b01 *tests/testthat.R -970cea6265a2285a1865d398a855dd4a *tests/testthat/test_1.R +3ccf5c11bb2d6ef99e883ef97c955f07 *tests/testthat/test_1.R 71fa897a314a5fac9c5cdbe6693f7ab0 *vignettes/introduction.Rmd diff --git a/NEWS.md b/NEWS.md index b6fb429..2b61e86 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# implied Version 0.3.1 +* Raises error if the inverse odds sum to less than 1. +* NA's will be returned if there are NA's in the input odds. + # implied Version 0.3.0 * A new algorithm for Shin's method is included. * Small bugfix for when Shin's method fails and produces NA's. Instead of crashing, it now raises a warning and flags the result as problematic. diff --git a/R/implied_probabilities.R b/R/implied_probabilities.R index 01627ce..6ef72f9 100644 --- a/R/implied_probabilities.R +++ b/R/implied_probabilities.R @@ -86,7 +86,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar stopifnot(length(method) == 1, tolower(method) %in% c('basic', 'shin', 'bb', 'wpo', 'or', 'power', 'additive'), - all(odds >= 1), + all(odds >= 1, na.rm=TRUE), grossmargin >= 0, shin_method %in% c('js', 'uniroot'), length(shin_method) == 1) @@ -119,6 +119,16 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar inverted_odds_sum <- rowSums(inverted_odds) out$margin <- inverted_odds_sum - 1 + # Missing values + missing_idx <- apply(odds, MARGIN = 1, + FUN = function(x) any(is.na(x))) + + + if (any(inverted_odds_sum[!missing_idx] < 1)){ + stop('Some inverse odds sum to less than 1.') + } + + if (method == 'basic'){ out$probabilities <- inverted_odds / inverted_odds_sum @@ -133,6 +143,11 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar #if (shin_method == 'js' | grossmargin != 0){ for (ii in 1:n_odds){ + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + # initialize zz at 0 zz_tmp <- 0 @@ -157,6 +172,12 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar } } else { for (ii in 1:n_odds){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + res <- stats::uniroot(f=shin_solvefor, interval = c(0,0.4), io=inverted_odds[ii,]) zvalues[ii] <- res$root @@ -168,7 +189,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar out$probabilities <- probs out$zvalues <- zvalues - if (any(problematic_shin)){ + if (any(problematic_shin[!missing_idx])){ warning(sprintf('Could not find z: Did not converge in %d instances. Some results may be unreliable. See the "problematic" vector in the output.', sum(problematic_shin))) } @@ -194,6 +215,12 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar probs <- matrix(nrow=n_odds, ncol=n_outcomes) for (ii in 1:n_odds){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + res <- stats::uniroot(f=or_solvefor, interval = c(0.05, 5), io=inverted_odds[ii,]) odds_ratios[ii] <- res$root probs[ii,] <- or_func(cc=res$root, io = inverted_odds[ii,]) @@ -208,6 +235,12 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar exponents <- numeric(n_odds) for (ii in 1:n_odds){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + res <- stats::uniroot(f=pwr_solvefor, interval = c(0.001, 1), io=inverted_odds[ii,]) exponents[ii] <- res$root probs[ii,] <- pwr_func(nn=res$root, io = inverted_odds[ii,]) @@ -221,6 +254,12 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar probs <- matrix(nrow=n_odds, ncol=n_outcomes) for (ii in 1:n_odds){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + probs[ii,] <- inverted_odds[ii,] - ((inverted_odds_sum[ii] - 1) / n_outcomes) } @@ -242,7 +281,9 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar # check if there are any probabilites outside the 0-1 range. problematic <- apply(out$probabilities, MARGIN = 1, FUN=function(x){any(x > 1 | x < 0)}) problematic[is.na(problematic)] <- TRUE - if (any(problematic)){ + problematic[missing_idx] <- NA + + if (any(problematic, na.rm=TRUE)){ warning(sprintf('Probabilities outside the 0-1 range produced at %d instances.\n', sum(problematic))) } @@ -253,7 +294,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar if (method %in% c('shin', 'bb')){ negative_z <- out$zvalues < 0 - if (any(negative_z)){ + if (any(negative_z[!missing_idx])){ warning(sprintf('z estimated to be negative: Some results may be unreliable. See the "problematic" vector in the output.', negative_z)) } diff --git a/inst/doc/introduction.html b/inst/doc/introduction.html index eab143e..0a1e0fd 100644 --- a/inst/doc/introduction.html +++ b/inst/doc/introduction.html @@ -12,7 +12,7 @@ - + Introduction to the implied package @@ -70,7 +70,7 @@

Introduction to the implied package

Jonas C. Lindstrøm

-

2020-04-18

+

2020-08-02

diff --git a/tests/testthat/test_1.R b/tests/testthat/test_1.R index e0260e7..5e2d4cc 100644 --- a/tests/testthat/test_1.R +++ b/tests/testthat/test_1.R @@ -10,8 +10,11 @@ my_odds <- rbind(c(4.20, 3.70, 1.95), my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) + context("Implied probabilities") + + iprobs1_basic <- implied_probabilities(my_odds) iprobs1_shin <- implied_probabilities(my_odds, method='shin') iprobs1_shin2 <- implied_probabilities(my_odds, method='shin', grossmargin = 0.01) @@ -35,6 +38,7 @@ iprobs2_or <- implied_probabilities(my_odds2, method='or') iprobs2_power <- implied_probabilities(my_odds2, method='power') + # tolerance for some tests toll <- 0.0001 @@ -68,6 +72,9 @@ test_that("Output", { iprobs2_wpo <- implied_probabilities(my_odds2, method='wpo') ) + # New in version 0.3.1, should give error. + expect_error(implied_probabilities(my_odds[,1:2])) + expect_equal(class(iprobs1_basic), 'list') expect_equal(class(iprobs1_shin), 'list') expect_equal(class(iprobs1_shin2), 'list') @@ -183,4 +190,81 @@ test_that("Non-normalized results", { }) +context("Missing values") + +# some example odds, with missing value +my_odds_na <- rbind(c(4.20, 3.70, 1.95), + c(2.45, NA, 2.90), + c(2.05, 3.20, 3.80)) + +# Test with missing values + +iprobs1na_basic <- implied_probabilities(my_odds_na) +iprobs1na_shin <- implied_probabilities(my_odds_na, method='shin') +iprobs1na_shin2 <- implied_probabilities(my_odds_na, method='shin', grossmargin = 0.01) +iprobs1na_shin3 <- implied_probabilities(my_odds_na, method='shin', shin_method = 'uniroot') +iprobs1na_bb <- implied_probabilities(my_odds_na, method='bb') +iprobs1na_bb2 <- implied_probabilities(my_odds_na, method='bb', grossmargin = 0.01) +iprobs1na_wpo <- implied_probabilities(my_odds_na, method='wpo') +iprobs1na_or <- implied_probabilities(my_odds_na, method='or') +iprobs1na_power <- implied_probabilities(my_odds_na, method='power') +iprobs1na_additive <- implied_probabilities(my_odds_na, method='additive') + + +test_that("missing values", { + + expect_true(all(is.na(iprobs1na_basic$probabilities[2,]))) + expect_true(is.na(iprobs1na_basic$problematic[2])) + expect_true(is.na(iprobs1na_basic$margin[2])) + expect_false(is.na(iprobs1na_basic$problematic[1])) + expect_false(is.na(iprobs1na_basic$margin[1])) + + + expect_true(all(is.na(iprobs1na_shin$probabilities[2,]))) + expect_true(is.na(iprobs1na_shin$problematic[2])) + expect_false(is.na(iprobs1na_shin$problematic[1])) + expect_false(is.na(iprobs1na_shin$margin[1])) + + expect_true(all(is.na(iprobs1na_shin2$probabilities[2,]))) + expect_true(is.na(iprobs1na_shin2$problematic[2])) + expect_false(is.na(iprobs1na_shin2$problematic[1])) + expect_false(is.na(iprobs1na_shin2$margin[1])) + + expect_true(all(is.na(iprobs1na_shin3$probabilities[2,]))) + expect_true(is.na(iprobs1na_shin3$problematic[2])) + expect_false(is.na(iprobs1na_shin3$problematic[1])) + expect_false(is.na(iprobs1na_shin3$margin[1])) + + expect_true(all(is.na(iprobs1na_bb$probabilities[2,]))) + expect_true(is.na(iprobs1na_bb$problematic[2])) + expect_false(is.na(iprobs1na_bb$problematic[1])) + expect_false(is.na(iprobs1na_bb$margin[1])) + + expect_true(all(is.na(iprobs1na_bb2$probabilities[2,]))) + expect_true(is.na(iprobs1na_bb2$problematic[2])) + expect_false(is.na(iprobs1na_bb2$problematic[1])) + expect_false(is.na(iprobs1na_bb2$margin[1])) + + expect_true(all(is.na(iprobs1na_wpo$probabilities[2,]))) + expect_true(is.na(iprobs1na_wpo$problematic[2])) + expect_false(is.na(iprobs1na_wpo$problematic[1])) + expect_false(is.na(iprobs1na_wpo$margin[1])) + + expect_true(all(is.na(iprobs1na_or$probabilities[2,]))) + expect_true(is.na(iprobs1na_or$problematic[2])) + expect_false(is.na(iprobs1na_or$problematic[1])) + expect_false(is.na(iprobs1na_or$margin[1])) + + expect_true(all(is.na(iprobs1na_power$probabilities[2,]))) + expect_true(is.na(iprobs1na_power$problematic[2])) + expect_false(is.na(iprobs1na_power$problematic[1])) + expect_false(is.na(iprobs1na_power$margin[1])) + + expect_true(all(is.na(iprobs1na_additive$probabilities[2,]))) + expect_true(is.na(iprobs1na_additive$problematic[2])) + expect_false(is.na(iprobs1na_additive$problematic[1])) + expect_false(is.na(iprobs1na_additive$margin[1])) +}) + + From a03db4e4d138036c9fbdea63d7d92cfae8383d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Christoffer=20Lindstr=C3=B8m?= Date: Wed, 14 Jul 2021 09:20:06 +0000 Subject: [PATCH 2/4] version 0.4.0 --- DESCRIPTION | 13 +- MD5 | 22 ++-- NAMESPACE | 1 + NEWS.md | 8 ++ R/implied_odds.R | 248 +++++++++++++++++++++++++++++++++++ R/implied_probabilities.R | 90 +++++++++++-- inst/doc/introduction.R | 24 ++++ inst/doc/introduction.Rmd | 69 +++++++++- inst/doc/introduction.html | 117 +++++++++++++---- man/implied_odds.Rd | 33 +++++ man/implied_probabilities.Rd | 15 ++- tests/testthat/test_1.R | 198 +++++++++++++++++++++++++++- vignettes/introduction.Rmd | 69 +++++++++- 13 files changed, 832 insertions(+), 75 deletions(-) create mode 100644 R/implied_odds.R create mode 100644 man/implied_odds.Rd diff --git a/DESCRIPTION b/DESCRIPTION index b654768..2135681 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,19 +1,18 @@ Package: implied Type: Package -Title: Convert Bookmaker Odds to Probabilities -Version: 0.3.1 +Title: Convert Between Bookmaker Odds and Probabilities +Version: 0.4.0 Author: Jonas Christoffer Lindstrøm Maintainer: Jonas Christoffer Lindstrøm -Description: Convert bookmaker odds into proper probabilities. Seven different +Description: Convert between bookmaker odds and probabilities. Eight different algorithms are available, including basic normalization, Shin's method (Hyun Song Shin, (1992) ), and others. License: GPL-3 Encoding: UTF-8 -LazyData: true -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.1 Suggests: testthat (>= 2.0.1), knitr, rmarkdown VignetteBuilder: knitr NeedsCompilation: no -Packaged: 2020-08-02 17:43:32 UTC; jonas +Packaged: 2021-07-14 10:11:03 UTC; jonas Repository: CRAN -Date/Publication: 2020-08-02 22:10:05 UTC +Date/Publication: 2021-07-14 10:20:06 UTC diff --git a/MD5 b/MD5 index 6f1f8f3..465492e 100644 --- a/MD5 +++ b/MD5 @@ -1,13 +1,15 @@ -50d25839dec1f2c350722175bd78cb6a *DESCRIPTION -75b5ea970258de895ca96f3181b8d226 *NAMESPACE -5ec759872137c3f7c1c492e3cf60fdb7 *NEWS.md -f33f7af4c08b2c4172b1973ea88e6736 *R/implied_probabilities.R +b13c7584f8ecb8501bc1a84da0ac1bc6 *DESCRIPTION +d32331dc3ccbe103bf204525d1ff82dd *NAMESPACE +514866bab5d2fd3f6cfa059fe84ab07c *NEWS.md +20072e33af45ea50e9186b4e6d95e7ac *R/implied_odds.R +ecafe79c51297b46eb5ce28064cc1a61 *R/implied_probabilities.R c71d10f8db03d579fa1c0c6e76959a7f *R/zzz.R 36fd139695e3baa1e55cba43a4853575 *build/vignette.rds -aa3c1d34d43a0625c11e32d252ef5614 *inst/doc/introduction.R -71fa897a314a5fac9c5cdbe6693f7ab0 *inst/doc/introduction.Rmd -1cdb8370609f1d7633ab37e2f45a44f3 *inst/doc/introduction.html -4bddfd68c8d5de61751fe82910dd1e27 *man/implied_probabilities.Rd +2621ddfa3f34de39f514e2a11f8e8a80 *inst/doc/introduction.R +233f2eab794eec18ddde2c445b0f177a *inst/doc/introduction.Rmd +a0080cb7cb391ac7cee09571b9d1ada0 *inst/doc/introduction.html +32101c917ca72f6d3e0d880a57a802c7 *man/implied_odds.Rd +3af6367c1b5bc9a22f2c8920f4fe26dc *man/implied_probabilities.Rd bc77ebadaa37370915e00adac9036b01 *tests/testthat.R -3ccf5c11bb2d6ef99e883ef97c955f07 *tests/testthat/test_1.R -71fa897a314a5fac9c5cdbe6693f7ab0 *vignettes/introduction.Rmd +8bacdfb42471d07b3180eb210ccb2ca4 *tests/testthat/test_1.R +233f2eab794eec18ddde2c445b0f177a *vignettes/introduction.Rmd diff --git a/NAMESPACE b/NAMESPACE index 8a4e3ab..74581f9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,4 @@ # Generated by roxygen2: do not edit by hand +export(implied_odds) export(implied_probabilities) diff --git a/NEWS.md b/NEWS.md index 2b61e86..74f323c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ + +# implied Version 0.4.0 +* New function implied_odds(), that converts probabilities to odds with a given margin. +* New method = 'jsd' in implied_probabilities(). Chekc the introductory vignette for more information. + +# implied Version 0.3.2 +* Fixed wrong formula for the WPO method in the introduction vignette. + # implied Version 0.3.1 * Raises error if the inverse odds sum to less than 1. * NA's will be returned if there are NA's in the input odds. diff --git a/R/implied_odds.R b/R/implied_odds.R new file mode 100644 index 0000000..e42778b --- /dev/null +++ b/R/implied_odds.R @@ -0,0 +1,248 @@ + + + +# The functions xx_func_o(coef, probs) transforms proper probabilities (that sum to 1) +# into improper probabilities as a function of the input coeffient. +# The corresponding functions xx_o_solvefor(coef, probs, margin) are used +# with uniroot to find the coefficient that makes the transformed probabilities +# sum to the desired margin. + +# Transform the probabilities using the Shin's method, +# for a given value of the odds ratio cc. +shin_func_o <- function(zz, probs, grossmargin=NULL){ + + # Eq. 5 in Shin 1993. + yy <- sqrt((zz*probs) + ((1-zz)*probs^2)) + res <- yy * sum(yy) + + if (!is.null(grossmargin)){ + # Eq. 14 in in Fingleton & Waldron 1999 + res <- res / (1 - grossmargin) + } + + return(res) +} + +# the condition that the sum of the probabilites must sum to 1. +# Used with uniroot. +shin_o_solvefor <- function(zz, probs, margin, grossmargin=NULL){ + tmp <- shin_func_o(zz, probs, grossmargin) + sum(tmp) - (1 + margin) +} + + +# Transform the probabilities using the odds ratio method, +# for a given value of the odds ratio cc. +or_func_o <- function(cc, probs){ + or_probs <- cc * probs + or_probs / (1 - probs + or_probs) +} + +# The condition that the sum of the transformed probabilites +# must sum to 1 + margin. +or_o_solvefor <- function(cc, probs, margin){ + tmp <- or_func_o(cc, probs) + sum(tmp) - (1 + margin) +} + + +# Transform the probabilities using the power method. +pwr_func_o <- function(nn, probs){ + probs^(nn) +} + +# The condition that the sum of the transformed probabilites +# must sum to 1 + margin. +pwr_o_solvefor <- function(nn, probs, margin){ + tmp <- pwr_func_o(nn, probs) + sum(tmp) - (1 + margin) +} + + + +#' Implied odds with added margin from probabilities. +#' +#' This functions converts probabilities to odds in decimal format, while adding overround. +#' The function does the inverse of what the function \code{\link{implied_probabilities}} does. +#' +#' @param probabilities A matrix or numeric of probabilities, where each column is an outcome. +#' @param method A string giving the method to use. Valid methods are 'basic', 'shin', 'bb', 'wpo', 'or', 'power' or 'additive'. +#' @param margin numeric. How large margin (aka overround) should be added to the probabilities. +#' @param grossmargin Numeric. Must be 0 or greater. See the details. +#' @param normalize Logical. If TRUE (default), scale the input probabilites to sum to 1. +#' +#' @return A named list. The first component is named 'odds' and contain a matrix of +#' implied odds. The second depends on the method used to compute the probabilities. +#' +#' @export +implied_odds <- function(probabilities, method = 'basic', margin = 0, + grossmargin = NULL, normalize=TRUE){ + + stopifnot(length(method) == 1, + length(margin) == 1, + tolower(method) %in% c('basic', 'shin', 'bb', 'wpo', 'or', 'power', 'additive'), + all(probabilities >= 0, na.rm=TRUE)) + + + + if (!is.matrix(probabilities)){ + + if ('data.frame' %in% class(probabilities)){ + probabilities <- as.matrix(probabilities) + } else { + probabilities <- matrix(probabilities, nrow=1, + dimnames = list(NULL, names(probabilities))) + } + } + + # Make sure the probabilities sum to exactly 1. + if (normalize){ + probabilities <- probabilities / rowSums(probabilities) + } + + # Prepare the list that will be returned. + out <- vector(mode='list', length=1) + names(out) <- c('odds') + + # Some useful quantities + n_probs <- nrow(probabilities) + n_outcomes <- ncol(probabilities) + + # Missing values + missing_idx <- apply(probabilities, MARGIN = 1, + FUN = function(x) any(is.na(x))) + + # inverted_probs <- 1 / probabilities + + if (method == 'basic'){ + + out$odds <- 1 / (probabilities * (1 + margin)) + + } else if (method == 'shin'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + zz <- numeric(n_probs) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + if (margin != 0){ + res <- stats::uniroot(f=shin_o_solvefor, interval = c(0, 0.4), + probs=probabilities[ii,], + margin = margin, grossmargin = grossmargin) + zz[ii] <- res$root + } else { + zz[ii] <- 0 + } + + odds[ii,] <- 1 / shin_func_o(zz=zz[ii], probs = probabilities[ii,], grossmargin = grossmargin) + } + + out$odds <- odds + out$zvalues <- zz + + } else if (method == 'bb'){ + + if (is.null(grossmargin)){ + grossmargin <- 0 + } else { + stopifnot(grossmargin >= 0, + length(grossmargin) == 1) + } + + zz <- (((1-grossmargin)*(1 + margin)) - 1) / (n_outcomes-1) + out$odds <- 1 / ((1+margin) * (((probabilities*(1-zz)) + zz) / ((n_outcomes-1)*zz + 1))) + + out$zvalues <- zz + + } else if (method == 'wpo'){ + # Margin Weights Proportional to the Odds. + # Method from the Wisdom of the Crowds pdf. + invprob <- 1 / probabilities + out$specific_margins <- (margin * invprob) / n_outcomes + out$odds <- invprob / (1 + out$specific_margins) + + } else if (method == 'or'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + odds_ratios <- numeric(n_probs) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + if (margin != 0){ + res <- stats::uniroot(f=or_o_solvefor, interval = c(0.05, 5), + probs=probabilities[ii,], margin = margin) + odds_ratios[ii] <- res$root + } else { + odds_ratios[ii] <- 1 + } + + odds[ii,] <- 1 / or_func_o(cc=odds_ratios[ii], probs = probabilities[ii,]) + } + + out$odds <- odds + out$odds_ratios <- odds_ratios + + } else if (method == 'power'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + exponents <- numeric(n_probs) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + if (margin != 0){ + res <- stats::uniroot(f=pwr_o_solvefor, interval = c(0.0001, 1.1), + probs=probabilities[ii,], margin = margin) + exponents[ii] <- res$root + } else { + exponents[ii] <- 1 + } + + odds[ii,] <- 1 / pwr_func_o(nn=exponents[ii], probs = probabilities[ii,]) + } + + out$odds <- odds + out$exponents <- exponents + + } else if (method == 'additive'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + odds[ii,] <- 1 / (probabilities[ii,] + (margin / n_outcomes)) + } + + out$odds <- odds + + } + + # Make sure the matrix of implied probabilities has column names. + if (!is.null(colnames(probabilities))){ + colnames(out$odds) <- colnames(probabilities) + } + + + return(out) + + +} diff --git a/R/implied_probabilities.R b/R/implied_probabilities.R index 6ef72f9..8ebb9e6 100644 --- a/R/implied_probabilities.R +++ b/R/implied_probabilities.R @@ -31,11 +31,13 @@ #' For values other than 0, this might sometimes cause some probabilities to not be identifiable. A warning #' will be given if this happens. #' +#' The method 'jsd' was developed by Christopher D. Long, and described in a series of Twitter postings +#' and a python implementation posted on GitHub. #' #' #' @param odds A matrix or numeric of bookmaker odds. The odds must be in the decimal format. #' @param method A string giving the method to use. Valid methods are 'basic', 'shin', 'bb', -#' 'wpo', 'or', 'power' or 'additive'. +#' 'wpo', 'or', 'power', 'additive', and 'jsd'. #' @param normalize Logical. Some of the methods will give small rounding errors. If TRUE (default) #' a final normalization is applied to make absoultely sure the #' probabilities sum to 1. @@ -49,10 +51,12 @@ #' \itemize{ #' \item{ zvalues (method = 'shin' and method='bb'): The estimated amount of insider trade.} #' \item{ specific_margins (method = 'wpo'): Matrix of the margins applied to each outcome.} -#' \item{ odds_ratios (method = 'or'): Numeric with the odds ratio that is used to convert true -#' probabilities to bookamker probabilties.} -#' \item{ exponents (method = 'power'): The (inverse) exponents that is used to convert true -#' probabilities to bookamker probabilties.} +#' \item{ odds_ratios (method = 'or'): Numeric with the odds ratio that are used to convert true +#' probabilities to bookmaker probabilties.} +#' \item{ exponents (method = 'power'): The (inverse) exponents that are used to convert true +#' probabilities to bookmaker probabilties.} +#' \item{ distance (method = 'jsd'): The Jensen-Shannon distances that are used to convert true +#' probabilities to bookmaker probabilties.} #' } #' #' The fourth compnent 'problematic' is a logical vector called indicating if any probabilites has fallen @@ -85,7 +89,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar shin_method = 'js'){ stopifnot(length(method) == 1, - tolower(method) %in% c('basic', 'shin', 'bb', 'wpo', 'or', 'power', 'additive'), + tolower(method) %in% c('basic', 'shin', 'bb', 'wpo', 'or', 'power', 'additive', 'jsd'), all(odds >= 1, na.rm=TRUE), grossmargin >= 0, shin_method %in% c('js', 'uniroot'), @@ -208,7 +212,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar # Method from the Wisdom of the Crowds pdf. fair_odds <- (n_outcomes * odds) / (n_outcomes - (out$margin * odds)) out$probabilities <- 1 / fair_odds - out$specific_margins = (out$margin * fair_odds) / n_outcomes + out$specific_margins <- (out$margin * fair_odds) / n_outcomes } else if (method == 'or'){ odds_ratios <- numeric(n_odds) @@ -241,7 +245,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar next } - res <- stats::uniroot(f=pwr_solvefor, interval = c(0.001, 1), io=inverted_odds[ii,]) + res <- stats::uniroot(f=pwr_solvefor, interval = c(0.0001, 1), io=inverted_odds[ii,]) exponents[ii] <- res$root probs[ii,] <- pwr_func(nn=res$root, io = inverted_odds[ii,]) } @@ -265,6 +269,27 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar out$probabilities <- probs + } else if (method == 'jsd'){ + + probs <- matrix(nrow=n_odds, ncol=n_outcomes) + jsds <- numeric(n_odds) + + for (ii in 1:n_odds){ + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + # 0.2 seems to be a reasonable upper bound. + res <- stats::uniroot(f=jsd_solvefor, interval = c(0.0000001, 0.2), io=inverted_odds[ii,], + tol=0.0000001) + jsds[ii] <- res$root + probs[ii,] <- jsd_func(jsd=res$root, io = inverted_odds[ii,]) + } + + out$probabilities <- probs + out$distance <- jsds + + } ## do a final normalization to make sure the probabilites @@ -312,7 +337,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar # and be used with uniroot. ######################################################### -# Calculate the probabilities usin Shin's formula, for a given value of z. +# Calculate the probabilities using Shin's formula, for a given value of z. # io = inverted odds. shin_func <- function(zz, io){ bb <- sum(io) @@ -320,7 +345,7 @@ shin_func <- function(zz, io){ } -# Calculate the probabilities usin the odds ratio method, +# Calculate the probabilities using the odds ratio method, # for a given value of the odds ratio cc. # io = inverted odds. or_func <- function(cc, io){ @@ -328,7 +353,6 @@ or_func <- function(cc, io){ } - # the condition that the sum of the probabilites must sum to 1. # Used with uniroot. shin_solvefor <- function(zz, io){ @@ -357,4 +381,48 @@ pwr_solvefor <- function(nn, io){ sum(tmp) - 1 } +# Simple discrete KL-divergence. +kld <- function(x, y){ + sum(x * log(x/y)) +} + +# The binomial symmetric Jensen–Shannon distance +# assuming p and io have length 1. +binom_jsd <- function(p, io){ + + pvec <- c(p, 1-p) + iovec <- c(io, 1-io) + + mm <- (pvec + iovec) / 2 + sqrt((kld(pvec, mm)/2) + (kld(iovec, mm)/2)) + +} + +# Find the probabilties for a given JS distance and inverted odds. +jsd_func <- function(jsd, io){ + + # The function to be used by uniroot to find p from kl and io. + tosolve <- function(p, io, jsd){ + binom_jsd(p=p, io = io) - jsd + } + + pp <- numeric(length(io)) + for (ii in 1:length(io)){ + # Intervall from approx 0 to io, implying + # That the underlying probability i less than the + # inverse odds. + pp[ii] <- stats::uniroot(f = tosolve, + interval = c(0.00001, io[ii]), + io = io[ii], jsd = jsd)$root + } + return(pp) +} + +# Calculate the probabilities using the Jensen-Shannon distance method, +# for a given value of the odds ratio cc. +# io = inverted odds. +jsd_solvefor <- function(jsd, io){ + sum(jsd_func(jsd=jsd, io = io)) - 1 +} + diff --git a/inst/doc/introduction.R b/inst/doc/introduction.R index f350d7a..ca7bf82 100644 --- a/inst/doc/introduction.R +++ b/inst/doc/introduction.R @@ -79,3 +79,27 @@ res8$probabilities # The estimated proportion of inside traders. res8$zvalues +## ----jsd---------------------------------------------------------------------- +# Balanced books +res9 <- implied_probabilities(my_odds, method = 'jsd') + +res9$probabilities + +# The estimated noise (JS distance) +res9$distance + +## ----imp_odds1---------------------------------------------------------------- + +res_odds1 <- implied_odds(res4$probabilities[1,], + margin = res4$margin[1], + method = 'power') + +res_odds1$odds + +# The exponents. +res_odds1$exponents + +# Compare to the exponent from the odds-to-probability conversion. +res4$exponents[1] + + diff --git a/inst/doc/introduction.Rmd b/inst/doc/introduction.Rmd index b20f5d4..d3ea57a 100644 --- a/inst/doc/introduction.Rmd +++ b/inst/doc/introduction.Rmd @@ -16,8 +16,7 @@ knitr::opts_chunk$set( ) ``` - -This package contain a single function, implied_probabilities(), that convert bookmaker odds into proper probabiltiies. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. +This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabiltiies. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demostration on how to convert probabiliteis to bookmaker odds. A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the [favorite-longshot bias](https://en.wikipedia.org/wiki/Favourite-longshot_bias). The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias. @@ -59,7 +58,7 @@ This method is from [Joseph Buchdahl's Wisom of the Crowds document](https://www The probabilities are calculated from the bookmaker odds O using the following formula -pi = n * Oi / (n - M * Oi) +pi = (n - M * Oi) / n * Oi where n is the number of outcomes, and M is the bookmaker margin. @@ -142,6 +141,8 @@ res6$probabilities ``` + + ## Balanced books and Shin's method The two methods referred to as "balanced book" and Shin's method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise "true" uncertainty about the outcome. The proportion of inside traders is denoted Z. @@ -172,6 +173,60 @@ res8$zvalues ``` +## The Jensen–Shannon distance method + +This method sees the improper bookmaker probabilties as a noisy version of the true underlying probabilities, and uses the [Jensen–Shannon (JS) distance](https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence) as a measure of how noisy the bookmaker probabilities are. + +For the sake of finding the denoised proabilities pi, each outcome i is modelled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi: + + +D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi) + +where mi = (pi + ri) / 2 + +and + +BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y)) + +is the "binomial" Kullback–Leibler divergence. + +The solution is found numerically by finding the value of of D so that sum(pi) = 1. + +The method was developed by Christopher D. Long (twitter: [\@octonion](https://twitter.com/octonion)), and described in a series of Twitter postings [[1](https://twitter.com/octonion/status/1412847000068952064)]. + +```{r jsd} +# Balanced books +res9 <- implied_probabilities(my_odds, method = 'jsd') + +res9$probabilities + +# The estimated noise (JS distance) +res9$distance +``` + +## Converting probabilities to odds + +There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details. + +In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy. + + +```{r imp_odds1} + +res_odds1 <- implied_odds(res4$probabilities[1,], + margin = res4$margin[1], + method = 'power') + +res_odds1$odds + +# The exponents. +res_odds1$exponents + +# Compare to the exponent from the odds-to-probability conversion. +res4$exponents[1] + +``` + ## Other packages The [odds.converter](https://cran.r-project.org/package=odds.converter) package can convert between different odds formats, including to decimal odds, that this package requires. @@ -182,16 +237,16 @@ Here are some relevant references and links: * Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET [Link](https://www.football-data.co.uk/wisdom_of_crowd_bets) -* Keith Cheung (2015) Fixed-odds betting and traditional odds [Link](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds) +* Keith Cheung (2015) Fixed-odds betting and traditional odds [Link](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/) * Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround [Link](http://www.sciencepublishinggroup.com/journal/paperinfo?journalid=155&doi=10.11648/j.ajss.20170506.12) -* Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias [Link](https://www.jstor.org/stable/2234526) +* Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias [Link](https://doi.org/10.2307/2234526) -* Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims [Link](https://www.jstor.org/stable/2234526) +* Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims [Link](https://doi.org/10.2307/2234526) * Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin [Link](https://doi.org/10.2307/2235458) -* John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.[Link](https://pdfs.semanticscholar.org/e576/f3b103e0ba041ae072a9201b948059c7806e.pdf) +* John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.[Link](https://www.semanticscholar.org/paper/Optimal-Determination-of-Bookmakers'-Betting-Odds%3A-Fingleton-Waldron/e576f3b103e0ba041ae072a9201b948059c7806e) diff --git a/inst/doc/introduction.html b/inst/doc/introduction.html index 0a1e0fd..99485a8 100644 --- a/inst/doc/introduction.html +++ b/inst/doc/introduction.html @@ -1,25 +1,25 @@ - + - + - + - + Introduction to the implied package - + + + + + - @@ -69,12 +94,12 @@

Introduction to the implied package

-

Jonas C. Lindstrøm

-

2020-08-02

+

Jonas C. Lindstrøm

+

2021-07-14

-

This package contain a single function, implied_probabilities(), that convert bookmaker odds into proper probabiltiies. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds.

+

This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabiltiies. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demostration on how to convert probabiliteis to bookmaker odds.

A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the favorite-longshot bias. The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias.

The basic method

@@ -107,9 +132,9 @@

The basic method

Margin Weights Proportional to the Odds

-

This method is from Joseph Buchdahl’s Wisom of the Crowds document, and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias.

+

This method is from Joseph Buchdahl's Wisom of the Crowds document, and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias.

The probabilities are calculated from the bookmaker odds O using the following formula

-

pi = n * Oi / (n - M * Oi)

+

pi = (n - M * Oi) / n * Oi

where n is the number of outcomes, and M is the bookmaker margin.

res2 <- implied_probabilities(my_odds, method = 'wpo')
 
@@ -154,19 +179,19 @@ 

The power method

res4$probabilities #> Home Draw Away -#> [1,] 0.2311414 0.2630644 0.5057942 +#> [1,] 0.2311414 0.2630644 0.5057941 #> [2,] 0.4003156 0.2627189 0.3369655 -#> [3,] 0.4667139 0.2908985 0.2423876 +#> [3,] 0.4667137 0.2908986 0.2423877 # The inverse exponents (n) used to convert the proper probablities to bookmaker probabilities. res4$exponents -#> [1] 0.9797664 0.9788115 0.9419744
+#> [1] 0.9797666 0.9788117 0.9419759

The additive method

The additive method removes the margin from the naive probabilites by subtracting an equal amount of of the margin from each outcome. The formula used is

pi = ri - ((sum(r) - 1) / n)

-

If there are only two outcomes, the additive method and Shin’s method are equivalent.

+

If there are only two outcomes, the additive method and Shin's method are equivalent.


 res5 <- implied_probabilities(my_odds, method = 'additive')
 
@@ -188,9 +213,9 @@ 

The additive method

#> [1,] 0.8283333 0.1583333 0.05833333 0.008333333 -0.02166667 -0.03166667
-

Balanced books and Shin’s method

-

The two methods referred to as “balanced book” and Shin’s method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise “true” uncertainty about the outcome. The proportion of inside traders is denoted Z.

-

The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin’s method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur.

+

Balanced books and Shin's method

+

The two methods referred to as "balanced book" and Shin's method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise "true" uncertainty about the outcome. The proportion of inside traders is denoted Z.

+

The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin's method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur.

We can not know what the insiders know, but both methods gives an estimate of the proportion of insiders.

res7 <- implied_probabilities(my_odds, method = 'shin')
 
@@ -216,6 +241,51 @@ 

Balanced books and Shin’s method

res8$zvalues #> [1] 0.01059301 0.01163056 0.03173139
+
+

The Jensen–Shannon distance method

+

This method sees the improper bookmaker probabilties as a noisy version of the true underlying probabilities, and uses the Jensen–Shannon (JS) distance as a measure of how noisy the bookmaker probabilities are.

+

For the sake of finding the denoised proabilities pi, each outcome i is modelled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi:

+

D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi)

+

where mi = (pi + ri) / 2

+

and

+

BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y))

+

is the "binomial" Kullback–Leibler divergence.

+

The solution is found numerically by finding the value of of D so that sum(pi) = 1.

+

The method was developed by Christopher D. Long (twitter: @octonion), and described in a series of Twitter postings [1].

+
# Balanced books
+res9 <- implied_probabilities(my_odds, method = 'jsd')
+
+res9$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2315189 0.2634117 0.5050694
+#> [2,] 0.4000505 0.2629589 0.3369906
+#> [3,] 0.4650456 0.2915675 0.2433869
+
+# The estimated noise (JS distance)
+res9$distance
+#> [1] 0.005485370 0.005849242 0.016099288
+
+
+

Converting probabilities to odds

+

There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details.

+

In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy.

+

+res_odds1 <- implied_odds(res4$probabilities[1,], 
+                     margin = res4$margin[1], 
+                     method = 'power')
+
+res_odds1$odds
+#>      Home     Draw     Away
+#> [1,]  4.2 3.700001 1.950005
+
+# The exponents.
+res_odds1$exponents
+#> [1] 0.9797634
+
+# Compare to the exponent from the odds-to-probability conversion.
+res4$exponents[1]
+#> [1] 0.9797666
+

Other packages

The odds.converter package can convert between different odds formats, including to decimal odds, that this package requires.

@@ -225,17 +295,20 @@

Literature

Here are some relevant references and links:

  • Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET Link

  • -
  • Keith Cheung (2015) Fixed-odds betting and traditional odds Link

  • +
  • Keith Cheung (2015) Fixed-odds betting and traditional odds Link

  • Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround Link

  • -
  • Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias Link

  • -
  • Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims Link

  • +
  • Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias Link

  • +
  • Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims Link

  • Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin Link

  • -
  • John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers’ Betting Odds: Theory and Tests.Link

  • +
  • John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.Link

+ + + - - - - - - - - - - - - - - - -

Introduction to the implied package

-

Jonas C. Lindstrøm

-

2021-07-14

- - - -

This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabiltiies. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demostration on how to convert probabiliteis to bookmaker odds.

-

A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the favorite-longshot bias. The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias.

-
-

The basic method

-

The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilties, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the cooresponding inverted odds, then the probabilities are computed as

-

pi = ri / sum(r)

-

This method tend to be the least accurate of the methods in this package. I have also seen this normalization method been referred to as the multiplicative method.

-

The implied_probabilities() function return a list with the proper probabilities (as a matrix) and the bookmaker margins.

-

In the examples below are three sets of bookmaker odds from three football matches.

-

-library(implied)
-#> If you find this package useful, please consider supporting the development at
-#> https://ko-fi.com/opisthokonta
-
-# One column for each outcome, one row for each race or match.
-my_odds <- rbind(c(4.20, 3.70, 1.95),
-                 c(2.45, 3.70, 2.90),
-                 c(2.05, 3.20, 3.80))
-colnames(my_odds) <- c('Home', 'Draw', 'Away')
-
-res1 <- implied_probabilities(my_odds)
-
-res1$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2331556 0.2646631 0.5021813
-#> [2,] 0.3988848 0.2641264 0.3369888
-#> [3,] 0.4586948 0.2938514 0.2474538
-
-res1$margin
-#> [1] 0.02118602 0.02326112 0.06346277
-
-
-

Margin Weights Proportional to the Odds

-

This method is from Joseph Buchdahl's Wisom of the Crowds document, and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias.

-

The probabilities are calculated from the bookmaker odds O using the following formula

-

pi = (n - M * Oi) / n * Oi

-

where n is the number of outcomes, and M is the bookmaker margin.

-
res2 <- implied_probabilities(my_odds, method = 'wpo')
-
-res2$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2310332 0.2632083 0.5057585
-#> [2,] 0.4004096 0.2625166 0.3370739
-#> [3,] 0.4666506 0.2913457 0.2420036
-
-# The margins applied to each outcome.
-res2$specific_margins
-#>            Home       Draw       Away
-#> [1,] 0.03056706 0.02683049 0.01396320
-#> [2,] 0.01936444 0.02953607 0.02300299
-#> [3,] 0.04533211 0.07260878 0.08741297
-
-
-

The odds ratio method

-

The odds ratio method is also from the Wisdom of the Crowds document, but is originally from an article by Keith Cheung. This method models the relationship between the proper probabilities and the improper bookmaker probabilties using the odds ratio (OR) function:

-

OR = pi (1 - ri) / ri (1 - pi)

-

This gives the probabilities

-

pi = ri / OR + ri - (OR * ri)

-

where the odds ratio OR is selected so that sum(pi) = 1.

-
res3 <- implied_probabilities(my_odds, method = 'or')
-
-res3$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2320048 0.2636415 0.5043537
-#> [2,] 0.3996912 0.2633869 0.3369219
-#> [3,] 0.4634406 0.2919032 0.2446562
-
-# The odds ratios converting the proper probablities to bookmaker probabilities.
-res3$odds_ratios
-#> [1] 1.034449 1.035805 1.102606
-
-
-

The power method

-

The power method models the bookmaker probabilties as a power function of the proper probabilties. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method.

-

pi = ri(1/k)

-

where k is selected so that sum(pi) = 1.

-
res4 <- implied_probabilities(my_odds, method = 'power')
-
-res4$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2311414 0.2630644 0.5057941
-#> [2,] 0.4003156 0.2627189 0.3369655
-#> [3,] 0.4667137 0.2908986 0.2423877
-
-# The inverse exponents (n) used to convert the proper probablities to bookmaker probabilities.
-res4$exponents
-#> [1] 0.9797666 0.9788117 0.9419759
-
-
-

The additive method

-

The additive method removes the margin from the naive probabilites by subtracting an equal amount of of the margin from each outcome. The formula used is

-

pi = ri - ((sum(r) - 1) / n)

-

If there are only two outcomes, the additive method and Shin's method are equivalent.

-

-res5 <- implied_probabilities(my_odds, method = 'additive')
-
-res5$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2310332 0.2632083 0.5057585
-#> [2,] 0.4004096 0.2625166 0.3370739
-#> [3,] 0.4666506 0.2913457 0.2420036
-

One problem with the additive method is that it can produce negative probabilities, escpecially for outcomes with low probabilties. This can often be the case when there are many outcomes, for example in racing sports. If this happens, you will be given a warning. Here is an example taken from Clarke et al (2017):

-

-my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01)))
-colnames(my_odds2) <- paste('X', 1:6, sep='')
-
-res6 <- implied_probabilities(my_odds2, method = 'additive')
-#> Warning in implied_probabilities(my_odds2, method = "additive"): Probabilities outside the 0-1 range produced at 1 instances.
-
-res6$probabilities
-#>             X1        X2         X3          X4          X5          X6
-#> [1,] 0.8283333 0.1583333 0.05833333 0.008333333 -0.02166667 -0.03166667
-
-
-

Balanced books and Shin's method

-

The two methods referred to as "balanced book" and Shin's method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise "true" uncertainty about the outcome. The proportion of inside traders is denoted Z.

-

The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin's method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur.

-

We can not know what the insiders know, but both methods gives an estimate of the proportion of insiders.

-
res7 <- implied_probabilities(my_odds, method = 'shin')
-
-res7$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2315811 0.2635808 0.5048382
-#> [2,] 0.4000160 0.2629336 0.3370505
-#> [3,] 0.4645977 0.2919757 0.2434266
-
-# The estimated proportion of inside traders.
-res7$zvalues
-#> [1] 0.01054734 0.01157314 0.03187455
-
# Balanced books
-res8 <- implied_probabilities(my_odds, method = 'bb')
-
-res8$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2299380 0.2624575 0.5076046
-#> [2,] 0.4011989 0.2616832 0.3371179
-#> [3,] 0.4710196 0.2899698 0.2390106
-
-# The estimated proportion of inside traders.
-res8$zvalues
-#> [1] 0.01059301 0.01163056 0.03173139
-
-
-

The Jensen–Shannon distance method

-

This method sees the improper bookmaker probabilties as a noisy version of the true underlying probabilities, and uses the Jensen–Shannon (JS) distance as a measure of how noisy the bookmaker probabilities are.

-

For the sake of finding the denoised proabilities pi, each outcome i is modelled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi:

-

D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi)

-

where mi = (pi + ri) / 2

-

and

-

BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y))

-

is the "binomial" Kullback–Leibler divergence.

-

The solution is found numerically by finding the value of of D so that sum(pi) = 1.

-

The method was developed by Christopher D. Long (twitter: @octonion), and described in a series of Twitter postings [1].

-
# Balanced books
-res9 <- implied_probabilities(my_odds, method = 'jsd')
-
-res9$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2315189 0.2634117 0.5050694
-#> [2,] 0.4000505 0.2629589 0.3369906
-#> [3,] 0.4650456 0.2915675 0.2433869
-
-# The estimated noise (JS distance)
-res9$distance
-#> [1] 0.005485370 0.005849242 0.016099288
-
-
-

Converting probabilities to odds

-

There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details.

-

In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy.

-

-res_odds1 <- implied_odds(res4$probabilities[1,], 
-                     margin = res4$margin[1], 
-                     method = 'power')
-
-res_odds1$odds
-#>      Home     Draw     Away
-#> [1,]  4.2 3.700001 1.950005
-
-# The exponents.
-res_odds1$exponents
-#> [1] 0.9797634
-
-# Compare to the exponent from the odds-to-probability conversion.
-res4$exponents[1]
-#> [1] 0.9797666
-
-
-

Other packages

-

The odds.converter package can convert between different odds formats, including to decimal odds, that this package requires.

-
-
-

Literature

-

Here are some relevant references and links:

-
    -
  • Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET Link

  • -
  • Keith Cheung (2015) Fixed-odds betting and traditional odds Link

  • -
  • Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround Link

  • -
  • Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias Link

  • -
  • Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims Link

  • -
  • Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin Link

  • -
  • John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.Link

  • -
-
- - - - - - - - - - - + + + + + + + + + + + + + + + + +Introduction to the implied package + + + + + + + + + + + + + + + + + + + + + + + + + +

Introduction to the implied package

+

Jonas C. Lindstrøm

+

2022-06-08

+ + + +

This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabiltiies. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demostration on how to convert probabiliteis to bookmaker odds.

+

A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the favorite-longshot bias. The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias.

+
+

The basic method

+

The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilties, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the cooresponding inverted odds, then the probabilities are computed as

+

pi = ri / sum(r)

+

This method tend to be the least accurate of the methods in this package. I have also seen this normalization method been referred to as the multiplicative method.

+

The implied_probabilities() function return a list with the proper probabilities (as a matrix) and the bookmaker margins.

+

In the examples below are three sets of bookmaker odds from three football matches.

+

+library(implied)
+#> If you find this package useful, please consider supporting the development at
+#> https://ko-fi.com/opisthokonta
+
+# One column for each outcome, one row for each race or match.
+my_odds <- rbind(c(4.20, 3.70, 1.95),
+                 c(2.45, 3.70, 2.90),
+                 c(2.05, 3.20, 3.80))
+colnames(my_odds) <- c('Home', 'Draw', 'Away')
+
+res1 <- implied_probabilities(my_odds)
+
+res1$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2331556 0.2646631 0.5021813
+#> [2,] 0.3988848 0.2641264 0.3369888
+#> [3,] 0.4586948 0.2938514 0.2474538
+
+res1$margin
+#> [1] 0.02118602 0.02326112 0.06346277
+
+
+

Margin Weights Proportional to the Odds

+

This method is from Joseph Buchdahl’s Wisom of the Crowds document, and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias.

+

The probabilities are calculated from the bookmaker odds O using the following formula

+

pi = (n - M * Oi) / n * Oi

+

where n is the number of outcomes, and M is the bookmaker margin.

+
res2 <- implied_probabilities(my_odds, method = 'wpo')
+
+res2$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2310332 0.2632083 0.5057585
+#> [2,] 0.4004096 0.2625166 0.3370739
+#> [3,] 0.4666506 0.2913457 0.2420036
+
+# The margins applied to each outcome.
+res2$specific_margins
+#>            Home       Draw       Away
+#> [1,] 0.03056706 0.02683049 0.01396320
+#> [2,] 0.01936444 0.02953607 0.02300299
+#> [3,] 0.04533211 0.07260878 0.08741297
+
+
+

The odds ratio method

+

The odds ratio method is also from the Wisdom of the Crowds document, but is originally from an article by Keith Cheung. This method models the relationship between the proper probabilities and the improper bookmaker probabilties using the odds ratio (OR) function:

+

OR = pi (1 - ri) / ri (1 - pi)

+

This gives the probabilities

+

pi = ri / OR + ri - (OR * ri)

+

where the odds ratio OR is selected so that sum(pi) = 1.

+
res3 <- implied_probabilities(my_odds, method = 'or')
+
+res3$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2320048 0.2636415 0.5043537
+#> [2,] 0.3996912 0.2633869 0.3369219
+#> [3,] 0.4634406 0.2919032 0.2446562
+
+# The odds ratios converting the proper probablities to bookmaker probabilities.
+res3$odds_ratios
+#> [1] 1.034449 1.035805 1.102606
+
+
+

The power method

+

The power method models the bookmaker probabilties as a power function of the proper probabilties. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method.

+

pi = ri(1/k)

+

where k is selected so that sum(pi) = 1.

+
res4 <- implied_probabilities(my_odds, method = 'power')
+
+res4$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2311414 0.2630644 0.5057941
+#> [2,] 0.4003156 0.2627189 0.3369655
+#> [3,] 0.4667137 0.2908986 0.2423877
+
+# The inverse exponents (n) used to convert the proper probablities to bookmaker probabilities.
+res4$exponents
+#> [1] 0.9797666 0.9788117 0.9419759
+
+
+

The additive method

+

The additive method removes the margin from the naive probabilites by subtracting an equal amount of of the margin from each outcome. The formula used is

+

pi = ri - ((sum(r) - 1) / n)

+

If there are only two outcomes, the additive method and Shin’s method are equivalent.

+

+res5 <- implied_probabilities(my_odds, method = 'additive')
+
+res5$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2310332 0.2632083 0.5057585
+#> [2,] 0.4004096 0.2625166 0.3370739
+#> [3,] 0.4666506 0.2913457 0.2420036
+

One problem with the additive method is that it can produce negative probabilities, escpecially for outcomes with low probabilties. This can often be the case when there are many outcomes, for example in racing sports. If this happens, you will be given a warning. Here is an example taken from Clarke et al (2017):

+

+my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01)))
+colnames(my_odds2) <- paste('X', 1:6, sep='')
+
+res6 <- implied_probabilities(my_odds2, method = 'additive')
+#> Warning in implied_probabilities(my_odds2, method = "additive"): Probabilities outside the 0-1 range produced at 1 instances.
+
+res6$probabilities
+#>             X1        X2         X3          X4          X5          X6
+#> [1,] 0.8283333 0.1583333 0.05833333 0.008333333 -0.02166667 -0.03166667
+
+
+

Balanced books and Shin’s method

+

The two methods referred to as “balanced book” and Shin’s method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise “true” uncertainty about the outcome. The proportion of inside traders is denoted Z.

+

The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin’s method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur.

+

We can not know what the insiders know, but both methods gives an estimate of the proportion of insiders.

+
res7 <- implied_probabilities(my_odds, method = 'shin')
+
+res7$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2315811 0.2635808 0.5048382
+#> [2,] 0.4000160 0.2629336 0.3370505
+#> [3,] 0.4645977 0.2919757 0.2434266
+
+# The estimated proportion of inside traders.
+res7$zvalues
+#> [1] 0.01054734 0.01157314 0.03187455
+
# Balanced books
+res8 <- implied_probabilities(my_odds, method = 'bb')
+
+res8$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2299380 0.2624575 0.5076046
+#> [2,] 0.4011989 0.2616832 0.3371179
+#> [3,] 0.4710196 0.2899698 0.2390106
+
+# The estimated proportion of inside traders.
+res8$zvalues
+#> [1] 0.01059301 0.01163056 0.03173139
+
+
+

The Jensen–Shannon distance method

+

This method sees the improper bookmaker probabilties as a noisy version of the true underlying probabilities, and uses the Jensen–Shannon (JS) distance as a measure of how noisy the bookmaker probabilities are.

+

For the sake of finding the denoised proabilities pi, each outcome i is modelled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi:

+

D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi)

+

where mi = (pi + ri) / 2

+

and

+

BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y))

+

is the “binomial” Kullback–Leibler divergence.

+

The solution is found numerically by finding the value of of D so that sum(pi) = 1.

+

The method was developed by Christopher D. Long (twitter: @octonion), and described in a series of Twitter postings [1].

+
# Balanced books
+res9 <- implied_probabilities(my_odds, method = 'jsd')
+
+res9$probabilities
+#>           Home      Draw      Away
+#> [1,] 0.2315189 0.2634117 0.5050694
+#> [2,] 0.4000505 0.2629589 0.3369906
+#> [3,] 0.4650456 0.2915675 0.2433869
+
+# The estimated noise (JS distance)
+res9$distance
+#> [1] 0.005485370 0.005849242 0.016099288
+
+
+

Converting probabilities to odds

+

There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details.

+

In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy.

+

+res_odds1 <- implied_odds(res4$probabilities[1,], 
+                     margin = res4$margin[1], 
+                     method = 'power')
+
+res_odds1$odds
+#>      Home     Draw     Away
+#> [1,]  4.2 3.700001 1.950005
+
+# The exponents.
+res_odds1$exponents
+#> [1] 0.9797634
+
+# Compare to the exponent from the odds-to-probability conversion.
+res4$exponents[1]
+#> [1] 0.9797666
+
+
+

Other packages

+

The odds.converter package can convert between different odds formats, including to decimal odds, that this package requires.

+
+
+

Literature

+

Here are some relevant references and links:

+
    +
  • Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET Link

  • +
  • Keith Cheung (2015) Fixed-odds betting and traditional odds Link

  • +
  • Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround Link

  • +
  • Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias Link

  • +
  • Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims Link

  • +
  • Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin Link

  • +
  • John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers’ Betting Odds: Theory and Tests.Link

  • +
+
+ + + + + + + + + + + diff --git a/tests/testthat/test_1.R b/tests/testthat/test_1.R index b17a68c..5d0b528 100644 --- a/tests/testthat/test_1.R +++ b/tests/testthat/test_1.R @@ -1,456 +1,456 @@ - - -# some example odds -my_odds <- rbind(c(4.20, 3.70, 1.95), - c(2.45, 3.70, 2.90), - c(2.05, 3.20, 3.80), - c(1.595, 3.990, 6.760)) - -# Some odds desinged to be problematic with the additive method. -# It is also problematic with the wpo method. -my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) - - - -# tolerance for some tests -toll <- 0.00005 - - -context("Implied probabilities") - - - -iprobs1_basic <- implied_probabilities(my_odds) -iprobs1_shin <- implied_probabilities(my_odds, method='shin') -iprobs1_shin2 <- implied_probabilities(my_odds, method='shin', grossmargin = 0.01) -iprobs1_shin3 <- implied_probabilities(my_odds, method='shin', shin_method = 'uniroot') -iprobs1_bb <- implied_probabilities(my_odds, method='bb') -iprobs1_bb2 <- implied_probabilities(my_odds, method='bb', grossmargin = 0.01) -iprobs1_wpo <- implied_probabilities(my_odds, method='wpo') -iprobs1_or <- implied_probabilities(my_odds, method='or') -iprobs1_power <- implied_probabilities(my_odds, method='power') -iprobs1_additive <- implied_probabilities(my_odds, method='additive') -iprobs1_jsd <- implied_probabilities(my_odds, method='jsd') - -# Shin method uniroot, with grossmargin != 0 should switch to shin_method = 'js' -# Make sure the output is the same. -iprobs1_shin4 <- implied_probabilities(my_odds, method='shin', shin_method = 'uniroot', grossmargin = 0.01) - -iprobs2_basic <- implied_probabilities(my_odds2) -iprobs2_shin <- implied_probabilities(my_odds2, method='shin') -iprobs2_shin3 <- implied_probabilities(my_odds2, method='shin', shin_method = 'uniroot') -iprobs2_or <- implied_probabilities(my_odds2, method='or') -iprobs2_power <- implied_probabilities(my_odds2, method='power') - -# The KL method does not work with my_odds2. -#iprobs2_kl <- implied_probabilities(my_odds2, method='kl') - - - - -test_that("Output", { - - expect_silent( - iprobs2_shin <- implied_probabilities(my_odds2, method='shin') - ) - - expect_silent( - iprobs2_shin2 <- implied_probabilities(my_odds2, method='shin', grossmargin = 0.01) - ) - - expect_silent( - iprobs2_basic <- implied_probabilities(my_odds2) - ) - expect_silent( - iprobs2_shin <- implied_probabilities(my_odds2, method='shin') - ) - expect_silent( - iprobs2_or <- implied_probabilities(my_odds2, method='or') - ) - expect_silent( - iprobs2_power <- implied_probabilities(my_odds2, method='power') - ) - - expect_warning( - iprobs2_additive <- implied_probabilities(my_odds2, method='additive') - ) - expect_warning( - iprobs2_wpo <- implied_probabilities(my_odds2, method='wpo') - ) - - # New in version 0.3.1, should give error. - expect_error(implied_probabilities(my_odds[,1:2])) - - expect_equal(class(iprobs1_basic), 'list') - expect_equal(class(iprobs1_shin), 'list') - expect_equal(class(iprobs1_shin2), 'list') - expect_equal(class(iprobs1_shin3), 'list') - expect_equal(class(iprobs1_bb), 'list') - expect_equal(class(iprobs1_bb2), 'list') - expect_equal(class(iprobs1_wpo), 'list') - expect_equal(class(iprobs1_or), 'list') - expect_equal(class(iprobs1_additive), 'list') - expect_equal(class(iprobs1_jsd), 'list') - - expect_equal(all(abs(rowSums(iprobs1_basic$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_shin$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_shin2$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_shin3$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_bb$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_bb2$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_wpo$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_or$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_power$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_additive$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs1_jsd$probabilities) - 1) < toll), TRUE) - - expect_equal(all(iprobs1_basic$margin > 0), TRUE) - expect_equal(all(iprobs1_shin$margin > 0), TRUE) - expect_equal(all(iprobs1_shin2$margin > 0), TRUE) - expect_equal(all(iprobs1_shin3$margin > 0), TRUE) - expect_equal(all(iprobs1_bb$margin > 0), TRUE) - expect_equal(all(iprobs1_bb2$margin > 0), TRUE) - expect_equal(all(iprobs1_wpo$margin > 0), TRUE) - expect_equal(all(iprobs1_or$margin > 0), TRUE) - expect_equal(all(iprobs1_power$margin > 0), TRUE) - expect_equal(all(iprobs1_additive$margin > 0), TRUE) - expect_equal(all(iprobs1_jsd$margin > 0), TRUE) - - expect_equal(is.null(iprobs1_shin$zvalues), FALSE) - expect_equal(is.null(iprobs1_shin2$zvalues), FALSE) - expect_equal(is.null(iprobs1_shin3$zvalues), FALSE) - expect_equal(is.null(iprobs1_bb$zvalues), FALSE) - expect_equal(is.null(iprobs1_bb2$zvalues), FALSE) - expect_equal(is.null(iprobs1_wpo$specific_margins), FALSE) - expect_equal(is.null(iprobs1_or$odds_ratios), FALSE) - expect_equal(is.null(iprobs1_power$exponents), FALSE) - expect_equal(is.null(iprobs1_jsd$distance), FALSE) - - - expect_equal(class(iprobs2_basic), 'list') - expect_equal(class(iprobs2_shin), 'list') - expect_equal(class(iprobs2_shin2), 'list') - expect_equal(class(iprobs2_shin3), 'list') - expect_equal(class(iprobs2_wpo), 'list') - expect_equal(class(iprobs2_or), 'list') - expect_equal(class(iprobs2_additive), 'list') - - expect_equal(all(abs(rowSums(iprobs2_basic$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs2_shin$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs2_shin3$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs2_wpo$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs2_or$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs2_power$probabilities) - 1) < toll), TRUE) - expect_equal(all(abs(rowSums(iprobs2_additive$probabilities) - 1) < toll), TRUE) - - expect_equal(all(iprobs2_basic$margin > 0), TRUE) - expect_equal(all(iprobs2_shin$margin > 0), TRUE) - expect_equal(all(iprobs2_shin3$margin > 0), TRUE) - expect_equal(all(iprobs2_wpo$margin > 0), TRUE) - expect_equal(all(iprobs2_or$margin > 0), TRUE) - expect_equal(all(iprobs2_power$margin > 0), TRUE) - expect_equal(all(iprobs2_additive$margin > 0), TRUE) - - expect_equal(iprobs2_basic$problematic, FALSE) - expect_equal(iprobs2_shin$problematic, FALSE) - expect_equal(iprobs2_shin3$problematic, FALSE) - expect_equal(iprobs2_wpo$problematic, TRUE) - expect_equal(iprobs2_power$problematic, FALSE) - expect_equal(iprobs2_additive$problematic, TRUE) - - expect_equal(is.null(iprobs2_shin$zvalues), FALSE) - expect_equal(is.null(iprobs2_shin3$zvalues), FALSE) - expect_equal(is.null(iprobs2_wpo$specific_margins), FALSE) - expect_equal(is.null(iprobs2_or$odds_ratios), FALSE) - expect_equal(is.null(iprobs2_power$exponents), FALSE) - - expect_true(all(iprobs1_shin4$probabilities == iprobs1_shin2$probabilities)) - expect_message(implied_probabilities(my_odds, method='shin', shin_method = 'uniroot', grossmargin = 0.01)) - -}) - - -context("Non-normalized results") - -iprobs1_basic_nn <- implied_probabilities(my_odds, normalize = FALSE) -iprobs1_shin_nn <- implied_probabilities(my_odds, method='shin', normalize = FALSE) -iprobs1_shin2_nn <- implied_probabilities(my_odds, method='shin', grossmargin = 0.01, normalize = FALSE) -iprobs1_shin3_nn <- implied_probabilities(my_odds, method='shin', shin_method = 'uniroot', normalize = FALSE) -iprobs1_bb_nn <- implied_probabilities(my_odds, method='bb', normalize = FALSE) -iprobs1_bb2_nn <- implied_probabilities(my_odds, method='bb', grossmargin = 0.01, normalize = FALSE) -iprobs1_wpo_nn <- implied_probabilities(my_odds, method='wpo', normalize = FALSE) -iprobs1_or_nn <- implied_probabilities(my_odds, method='or', normalize = FALSE) -iprobs1_power_nn <- implied_probabilities(my_odds, method='power', normalize = FALSE) -iprobs1_additive_nn <- implied_probabilities(my_odds, method='additive', normalize = FALSE) -iprobs1_jsd_nn <- implied_probabilities(my_odds, method='jsd', normalize = FALSE) - -# They should all be reasonably close to 1. -test_that("Non-normalized results", { - expect_true(all(abs((rowSums(iprobs1_basic_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_shin_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_shin2_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_shin3_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_bb_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_bb2_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_wpo_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_or_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_power_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_additive_nn$probabilities) - 1)) < 0.01)) - expect_true(all(abs((rowSums(iprobs1_jsd_nn$probabilities) - 1)) < 0.01)) - -}) - - -context("Missing values") - -# some example odds, with missing value -my_odds_na <- rbind(c(4.20, 3.70, 1.95), - c(2.45, NA, 2.90), - c(2.05, 3.20, 3.80)) - -# Test with missing values - -iprobs1na_basic <- implied_probabilities(my_odds_na) -iprobs1na_shin <- implied_probabilities(my_odds_na, method='shin') -iprobs1na_shin2 <- implied_probabilities(my_odds_na, method='shin', grossmargin = 0.01) -iprobs1na_shin3 <- implied_probabilities(my_odds_na, method='shin', shin_method = 'uniroot') -iprobs1na_bb <- implied_probabilities(my_odds_na, method='bb') -iprobs1na_bb2 <- implied_probabilities(my_odds_na, method='bb', grossmargin = 0.01) -iprobs1na_wpo <- implied_probabilities(my_odds_na, method='wpo') -iprobs1na_or <- implied_probabilities(my_odds_na, method='or') -iprobs1na_power <- implied_probabilities(my_odds_na, method='power') -iprobs1na_additive <- implied_probabilities(my_odds_na, method='additive') -iprobs1na_jsd <- implied_probabilities(my_odds_na, method='jsd') - - -test_that("missing values", { - - expect_true(all(is.na(iprobs1na_basic$probabilities[2,]))) - expect_true(is.na(iprobs1na_basic$problematic[2])) - expect_true(is.na(iprobs1na_basic$margin[2])) - expect_false(is.na(iprobs1na_basic$problematic[1])) - expect_false(is.na(iprobs1na_basic$margin[1])) - - - expect_true(all(is.na(iprobs1na_shin$probabilities[2,]))) - expect_true(is.na(iprobs1na_shin$problematic[2])) - expect_false(is.na(iprobs1na_shin$problematic[1])) - expect_false(is.na(iprobs1na_shin$margin[1])) - - expect_true(all(is.na(iprobs1na_shin2$probabilities[2,]))) - expect_true(is.na(iprobs1na_shin2$problematic[2])) - expect_false(is.na(iprobs1na_shin2$problematic[1])) - expect_false(is.na(iprobs1na_shin2$margin[1])) - - expect_true(all(is.na(iprobs1na_shin3$probabilities[2,]))) - expect_true(is.na(iprobs1na_shin3$problematic[2])) - expect_false(is.na(iprobs1na_shin3$problematic[1])) - expect_false(is.na(iprobs1na_shin3$margin[1])) - - expect_true(all(is.na(iprobs1na_bb$probabilities[2,]))) - expect_true(is.na(iprobs1na_bb$problematic[2])) - expect_false(is.na(iprobs1na_bb$problematic[1])) - expect_false(is.na(iprobs1na_bb$margin[1])) - - expect_true(all(is.na(iprobs1na_bb2$probabilities[2,]))) - expect_true(is.na(iprobs1na_bb2$problematic[2])) - expect_false(is.na(iprobs1na_bb2$problematic[1])) - expect_false(is.na(iprobs1na_bb2$margin[1])) - - expect_true(all(is.na(iprobs1na_wpo$probabilities[2,]))) - expect_true(is.na(iprobs1na_wpo$problematic[2])) - expect_false(is.na(iprobs1na_wpo$problematic[1])) - expect_false(is.na(iprobs1na_wpo$margin[1])) - - expect_true(all(is.na(iprobs1na_or$probabilities[2,]))) - expect_true(is.na(iprobs1na_or$problematic[2])) - expect_false(is.na(iprobs1na_or$problematic[1])) - expect_false(is.na(iprobs1na_or$margin[1])) - - expect_true(all(is.na(iprobs1na_power$probabilities[2,]))) - expect_true(is.na(iprobs1na_power$problematic[2])) - expect_false(is.na(iprobs1na_power$problematic[1])) - expect_false(is.na(iprobs1na_power$margin[1])) - - expect_true(all(is.na(iprobs1na_additive$probabilities[2,]))) - expect_true(is.na(iprobs1na_additive$problematic[2])) - expect_false(is.na(iprobs1na_additive$problematic[1])) - expect_false(is.na(iprobs1na_additive$margin[1])) - - expect_true(all(is.na(iprobs1na_jsd$probabilities[2,]))) - expect_true(is.na(iprobs1na_jsd$problematic[2])) - expect_false(is.na(iprobs1na_jsd$problematic[1])) - expect_false(is.na(iprobs1na_jsd$margin[1])) - - -}) - - - -context("Implied odds") - - -my_probs <- rbind((1/c(1.5, 5, 7.5)), - c(0.1, 0.2, 0.7), - c(0.01, 0.3, 0.69)) - -my_margin <- 0.022 - - -iodds1_basic <- implied_odds(my_probs, method='basic', margin = my_margin) -iodds1_shin <- implied_odds(my_probs, method='shin', margin = my_margin) -iodds1_shin2 <- implied_odds(my_probs, method='shin', margin = my_margin, grossmargin = 0.01) -iodds1_bb <- implied_odds(my_probs, method='bb', margin = my_margin) -iodds1_bb2 <- implied_odds(my_probs, method='bb', margin = my_margin, grossmargin = 0.01) -iodds1_wpo <- implied_odds(my_probs, method='wpo', margin = my_margin) -iodds1_or <- implied_odds(my_probs, method='or', margin = my_margin) -iodds1_power <- implied_odds(my_probs, method='power', margin = my_margin) -iodds1_additive <- implied_odds(my_probs, method='additive', margin = my_margin) - - -iodds1_basic0 <- implied_odds(my_probs, method='basic', margin = 0) -iodds1_bb0 <- implied_odds(my_probs, method='bb', margin = 0) -iodds1_wpo0 <- implied_odds(my_probs, method='wpo', margin = 0) -iodds1_or0 <- implied_odds(my_probs, method='or', margin = 0) -iodds1_power0 <- implied_odds(my_probs, method='power', margin = 0) -iodds1_additive0 <- implied_odds(my_probs, method='additive', margin = 0) - - - -test_that("Output", { - - expect_equal(class(iodds1_basic), 'list') - expect_equal(class(iodds1_shin), 'list') - expect_equal(class(iodds1_shin2), 'list') - expect_equal(class(iodds1_bb), 'list') - expect_equal(class(iodds1_bb0), 'list') - expect_equal(class(iodds1_bb2), 'list') - expect_equal(class(iodds1_wpo), 'list') - expect_equal(class(iodds1_wpo0), 'list') - expect_equal(class(iodds1_or), 'list') - expect_equal(class(iodds1_or0), 'list') - expect_equal(class(iodds1_power), 'list') - expect_equal(class(iodds1_power0), 'list') - expect_equal(class(iodds1_additive), 'list') - expect_equal(class(iodds1_additive0), 'list') - - - # Sum of improper probabilties sum to 1 + margin - expect_true(all(abs(rowSums(1 / iodds1_basic$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_shin$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_shin2$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_bb$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_bb2$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_wpo$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_or$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_power$odds) - (1 + my_margin)) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_additive$odds) - (1 + my_margin)) <= toll)) - - # When theres no margin, probabilities should sum to 1. - expect_true(all(abs(rowSums(1 / iodds1_basic0$odds) - 1) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_bb0$odds) - 1) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_wpo0$odds) - 1) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_or0$odds) - 1) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_power0$odds) - 1) <= toll)) - expect_true(all(abs(rowSums(1 / iodds1_additive0$odds) - 1) <= toll)) - - # Check the coefficients for being alright. - expect_true(all(iodds1_shin$zvalues > 0)) - expect_true(all(iodds1_shin2$zvalues > 0)) - - expect_true(all(iodds1_bb0$zvalues >= 0)) - expect_true(all(iodds1_bb$zvalues > 0)) - expect_true(all(iodds1_bb2$zvalues > 0)) - - expect_true(all(iodds1_or$odds_ratios > 1)) - expect_true(all(iodds1_or0$odds_ratios == 1)) - - expect_true(all(iodds1_power0$exponents == 1)) - expect_true(all(iodds1_power$exponents < 1)) - - expect_true(all(iodds1_additive$odds > 1)) - expect_true(all(iodds1_additive0$odds > 1)) - - - # Check the odds. - expect_true(all(iodds1_basic$odds > 1)) - expect_true(all(iodds1_shin$odds > 1)) - expect_true(all(iodds1_shin2$odds > 1)) - expect_true(all(iodds1_bb$odds > 1)) - expect_true(all(iodds1_bb2$odds > 1)) - expect_true(all(iodds1_wpo$odds > 1)) - expect_true(all(iodds1_or$odds > 1)) - expect_true(all(iodds1_power$odds > 1)) - expect_true(all(iodds1_additive$odds > 1)) - - expect_true(all(iodds1_basic0$odds > 1)) - expect_true(all(iodds1_bb0$odds > 1)) - expect_true(all(iodds1_wpo0$odds > 1)) - expect_true(all(iodds1_or0$odds > 1)) - expect_true(all(iodds1_power0$odds > 1)) - expect_true(all(iodds1_additive0$odds > 1)) - -}) - - -context("Converting between odds and probabilities") - - -# Re-compute odds. - -idx <- 3 # The row in my_odds to check. - -iodds1_basic_r <- implied_odds(iprobs1_basic$probabilities[idx,], - method='basic', margin = iprobs1_basic$margin[idx]) - -iodds1_shin_r <- implied_odds(iprobs1_shin$probabilities[idx,], - method='shin', margin = iprobs1_shin$margin[idx]) - -iodds1_shin2_r <- implied_odds(iprobs1_shin2$probabilities[idx,], - method='shin', margin = iprobs1_shin$margin[idx], grossmargin = 0.01) - -iodds1_bb_r <- implied_odds(iprobs1_bb$probabilities[idx,], - method='bb', margin = iprobs1_bb$margin[idx]) - -iodds1_bb2_r <- implied_odds(iprobs1_bb2$probabilities[idx,], - method='bb', margin = iprobs1_bb2$margin[idx], grossmargin = 0.01) - -iodds1_wpo_r <- implied_odds(iprobs1_wpo$probabilities[idx,], - method='wpo', margin = iprobs1_wpo$margin[idx]) - -iodds1_or_r <- implied_odds(iprobs1_or$probabilities[idx,], - method='or', margin = iprobs1_or$margin[idx]) - -iodds1_power_r <- implied_odds(iprobs1_power$probabilities[idx,], - method='power', margin = iprobs1_power$margin[idx]) - -iodds1_additive_r <- implied_odds(iprobs1_additive$probabilities[idx,], - method='additive', margin = iprobs1_additive$margin[idx]) - - -test_that("Results", { - - # Check that we can recover the original odds. - expect_true(all(abs(iodds1_basic_r$odds - my_odds[idx,]) <= toll)) - expect_true(all(abs(iodds1_shin_r$odds - my_odds[idx,]) <= 0.001)) - expect_true(all(abs(iodds1_shin2_r$odds - my_odds[idx,]) <= 0.00015)) - - expect_true(all(abs(iodds1_bb_r$odds - my_odds[idx,]) <= toll)) - expect_true(all(abs(iodds1_bb2_r$odds - my_odds[idx,]) <= toll)) - expect_true(all(abs(iodds1_wpo_r$odds - my_odds[idx,]) <= toll)) - expect_true(all(abs(iodds1_or_r$odds - my_odds[idx,]) <= toll)) - expect_true(all(abs(iodds1_power_r$odds - my_odds[idx,]) <= toll)) - expect_true(all(abs(iodds1_additive_r$odds - my_odds[idx,]) <= toll)) - - # Check that the coefficients are the same. - expect_true(all(abs(iodds1_shin_r$zvalues - iprobs1_shin$zvalues[idx]) <= 0.0001)) - expect_true(all(abs(iodds1_bb_r$zvalues - iprobs1_bb$zvalues[idx]) <= toll)) - expect_true(all(abs(iodds1_bb2_r$zvalues - iprobs1_bb2$zvalues[idx]) <= toll)) - expect_true(all(abs(iodds1_wpo_r$specific_margins - iprobs1_wpo$specific_margins[idx,]) <= toll)) - expect_true(abs(iodds1_or_r$odds_ratios - iprobs1_or$odds_ratios[idx]) <= toll) - expect_true(abs(iodds1_power_r$exponents - iprobs1_power$exponents[idx]) <= toll) - -}) - - - + + +# some example odds +my_odds <- rbind(c(4.20, 3.70, 1.95), + c(2.45, 3.70, 2.90), + c(2.05, 3.20, 3.80), + c(1.595, 3.990, 6.760), + c(1.19 ,7.0 , 14.0)) + +# Some odds desinged to be problematic with the additive method. +# It is also problematic with the wpo method. +my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) + + +# tolerance for some tests +toll <- 0.00005 + + +context("Implied probabilities") + + + +iprobs1_basic <- implied_probabilities(my_odds) +iprobs1_shin <- implied_probabilities(my_odds, method='shin') +iprobs1_shin2 <- implied_probabilities(my_odds, method='shin', grossmargin = 0.01) +iprobs1_shin3 <- implied_probabilities(my_odds, method='shin', shin_method = 'uniroot') +iprobs1_bb <- implied_probabilities(my_odds, method='bb') +iprobs1_bb2 <- implied_probabilities(my_odds, method='bb', grossmargin = 0.01) +iprobs1_wpo <- implied_probabilities(my_odds, method='wpo') +iprobs1_or <- implied_probabilities(my_odds, method='or') +iprobs1_power <- implied_probabilities(my_odds, method='power') +iprobs1_additive <- implied_probabilities(my_odds, method='additive') +iprobs1_jsd <- implied_probabilities(my_odds, method='jsd') + +# Shin method uniroot, with grossmargin != 0 should switch to shin_method = 'js' +# Make sure the output is the same. +iprobs1_shin4 <- implied_probabilities(my_odds, method='shin', shin_method = 'uniroot', grossmargin = 0.01) + +iprobs2_basic <- implied_probabilities(my_odds2) +iprobs2_shin <- implied_probabilities(my_odds2, method='shin') +iprobs2_shin3 <- implied_probabilities(my_odds2, method='shin', shin_method = 'uniroot') +iprobs2_or <- implied_probabilities(my_odds2, method='or') +iprobs2_power <- implied_probabilities(my_odds2, method='power') + +# The KL method does not work with my_odds2. +#iprobs2_kl <- implied_probabilities(my_odds2, method='kl') + + + + +test_that("Output", { + + expect_silent( + iprobs2_shin <- implied_probabilities(my_odds2, method='shin') + ) + + expect_silent( + iprobs2_shin2 <- implied_probabilities(my_odds2, method='shin', grossmargin = 0.01) + ) + + expect_silent( + iprobs2_basic <- implied_probabilities(my_odds2) + ) + expect_silent( + iprobs2_shin <- implied_probabilities(my_odds2, method='shin') + ) + expect_silent( + iprobs2_or <- implied_probabilities(my_odds2, method='or') + ) + expect_silent( + iprobs2_power <- implied_probabilities(my_odds2, method='power') + ) + + expect_warning( + iprobs2_additive <- implied_probabilities(my_odds2, method='additive') + ) + expect_warning( + iprobs2_wpo <- implied_probabilities(my_odds2, method='wpo') + ) + + # New in version 0.3.1, should give error. + expect_error(implied_probabilities(my_odds[,1:2])) + + expect_equal(class(iprobs1_basic), 'list') + expect_equal(class(iprobs1_shin), 'list') + expect_equal(class(iprobs1_shin2), 'list') + expect_equal(class(iprobs1_shin3), 'list') + expect_equal(class(iprobs1_bb), 'list') + expect_equal(class(iprobs1_bb2), 'list') + expect_equal(class(iprobs1_wpo), 'list') + expect_equal(class(iprobs1_or), 'list') + expect_equal(class(iprobs1_additive), 'list') + expect_equal(class(iprobs1_jsd), 'list') + + expect_equal(all(abs(rowSums(iprobs1_basic$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_shin$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_shin2$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_shin3$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_bb$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_bb2$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_wpo$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_or$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_power$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_additive$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs1_jsd$probabilities) - 1) < toll), TRUE) + + expect_equal(all(iprobs1_basic$margin > 0), TRUE) + expect_equal(all(iprobs1_shin$margin > 0), TRUE) + expect_equal(all(iprobs1_shin2$margin > 0), TRUE) + expect_equal(all(iprobs1_shin3$margin > 0), TRUE) + expect_equal(all(iprobs1_bb$margin > 0), TRUE) + expect_equal(all(iprobs1_bb2$margin > 0), TRUE) + expect_equal(all(iprobs1_wpo$margin > 0), TRUE) + expect_equal(all(iprobs1_or$margin > 0), TRUE) + expect_equal(all(iprobs1_power$margin > 0), TRUE) + expect_equal(all(iprobs1_additive$margin > 0), TRUE) + expect_equal(all(iprobs1_jsd$margin > 0), TRUE) + + expect_equal(is.null(iprobs1_shin$zvalues), FALSE) + expect_equal(is.null(iprobs1_shin2$zvalues), FALSE) + expect_equal(is.null(iprobs1_shin3$zvalues), FALSE) + expect_equal(is.null(iprobs1_bb$zvalues), FALSE) + expect_equal(is.null(iprobs1_bb2$zvalues), FALSE) + expect_equal(is.null(iprobs1_wpo$specific_margins), FALSE) + expect_equal(is.null(iprobs1_or$odds_ratios), FALSE) + expect_equal(is.null(iprobs1_power$exponents), FALSE) + expect_equal(is.null(iprobs1_jsd$distance), FALSE) + + + expect_equal(class(iprobs2_basic), 'list') + expect_equal(class(iprobs2_shin), 'list') + expect_equal(class(iprobs2_shin2), 'list') + expect_equal(class(iprobs2_shin3), 'list') + expect_equal(class(iprobs2_wpo), 'list') + expect_equal(class(iprobs2_or), 'list') + expect_equal(class(iprobs2_additive), 'list') + + expect_equal(all(abs(rowSums(iprobs2_basic$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs2_shin$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs2_shin3$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs2_wpo$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs2_or$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs2_power$probabilities) - 1) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs2_additive$probabilities) - 1) < toll), TRUE) + + expect_equal(all(iprobs2_basic$margin > 0), TRUE) + expect_equal(all(iprobs2_shin$margin > 0), TRUE) + expect_equal(all(iprobs2_shin3$margin > 0), TRUE) + expect_equal(all(iprobs2_wpo$margin > 0), TRUE) + expect_equal(all(iprobs2_or$margin > 0), TRUE) + expect_equal(all(iprobs2_power$margin > 0), TRUE) + expect_equal(all(iprobs2_additive$margin > 0), TRUE) + + expect_equal(iprobs2_basic$problematic, FALSE) + expect_equal(iprobs2_shin$problematic, FALSE) + expect_equal(iprobs2_shin3$problematic, FALSE) + expect_equal(iprobs2_wpo$problematic, TRUE) + expect_equal(iprobs2_power$problematic, FALSE) + expect_equal(iprobs2_additive$problematic, TRUE) + + expect_equal(is.null(iprobs2_shin$zvalues), FALSE) + expect_equal(is.null(iprobs2_shin3$zvalues), FALSE) + expect_equal(is.null(iprobs2_wpo$specific_margins), FALSE) + expect_equal(is.null(iprobs2_or$odds_ratios), FALSE) + expect_equal(is.null(iprobs2_power$exponents), FALSE) + + expect_true(all(iprobs1_shin4$probabilities == iprobs1_shin2$probabilities)) + expect_message(implied_probabilities(my_odds, method='shin', shin_method = 'uniroot', grossmargin = 0.01)) + +}) + + +context("Non-normalized results") + +iprobs1_basic_nn <- implied_probabilities(my_odds, normalize = FALSE) +iprobs1_shin_nn <- implied_probabilities(my_odds, method='shin', normalize = FALSE) +iprobs1_shin2_nn <- implied_probabilities(my_odds, method='shin', grossmargin = 0.01, normalize = FALSE) +iprobs1_shin3_nn <- implied_probabilities(my_odds, method='shin', shin_method = 'uniroot', normalize = FALSE) +iprobs1_bb_nn <- implied_probabilities(my_odds, method='bb', normalize = FALSE) +iprobs1_bb2_nn <- implied_probabilities(my_odds, method='bb', grossmargin = 0.01, normalize = FALSE) +iprobs1_wpo_nn <- implied_probabilities(my_odds, method='wpo', normalize = FALSE) +iprobs1_or_nn <- implied_probabilities(my_odds, method='or', normalize = FALSE) +iprobs1_power_nn <- implied_probabilities(my_odds, method='power', normalize = FALSE) +iprobs1_additive_nn <- implied_probabilities(my_odds, method='additive', normalize = FALSE) +iprobs1_jsd_nn <- implied_probabilities(my_odds, method='jsd', normalize = FALSE) + +# They should all be reasonably close to 1. +test_that("Non-normalized results", { + expect_true(all(abs((rowSums(iprobs1_basic_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_shin_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_shin2_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_shin3_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_bb_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_bb2_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_wpo_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_or_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_power_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_additive_nn$probabilities) - 1)) < 0.01)) + expect_true(all(abs((rowSums(iprobs1_jsd_nn$probabilities) - 1)) < 0.01)) + +}) + + +context("Missing values") + +# some example odds, with missing value +my_odds_na <- rbind(c(4.20, 3.70, 1.95), + c(2.45, NA, 2.90), + c(2.05, 3.20, 3.80)) + +# Test with missing values + +iprobs1na_basic <- implied_probabilities(my_odds_na) +iprobs1na_shin <- implied_probabilities(my_odds_na, method='shin') +iprobs1na_shin2 <- implied_probabilities(my_odds_na, method='shin', grossmargin = 0.01) +iprobs1na_shin3 <- implied_probabilities(my_odds_na, method='shin', shin_method = 'uniroot') +iprobs1na_bb <- implied_probabilities(my_odds_na, method='bb') +iprobs1na_bb2 <- implied_probabilities(my_odds_na, method='bb', grossmargin = 0.01) +iprobs1na_wpo <- implied_probabilities(my_odds_na, method='wpo') +iprobs1na_or <- implied_probabilities(my_odds_na, method='or') +iprobs1na_power <- implied_probabilities(my_odds_na, method='power') +iprobs1na_additive <- implied_probabilities(my_odds_na, method='additive') +iprobs1na_jsd <- implied_probabilities(my_odds_na, method='jsd') + + +test_that("missing values", { + + expect_true(all(is.na(iprobs1na_basic$probabilities[2,]))) + expect_true(is.na(iprobs1na_basic$problematic[2])) + expect_true(is.na(iprobs1na_basic$margin[2])) + expect_false(is.na(iprobs1na_basic$problematic[1])) + expect_false(is.na(iprobs1na_basic$margin[1])) + + + expect_true(all(is.na(iprobs1na_shin$probabilities[2,]))) + expect_true(is.na(iprobs1na_shin$problematic[2])) + expect_false(is.na(iprobs1na_shin$problematic[1])) + expect_false(is.na(iprobs1na_shin$margin[1])) + + expect_true(all(is.na(iprobs1na_shin2$probabilities[2,]))) + expect_true(is.na(iprobs1na_shin2$problematic[2])) + expect_false(is.na(iprobs1na_shin2$problematic[1])) + expect_false(is.na(iprobs1na_shin2$margin[1])) + + expect_true(all(is.na(iprobs1na_shin3$probabilities[2,]))) + expect_true(is.na(iprobs1na_shin3$problematic[2])) + expect_false(is.na(iprobs1na_shin3$problematic[1])) + expect_false(is.na(iprobs1na_shin3$margin[1])) + + expect_true(all(is.na(iprobs1na_bb$probabilities[2,]))) + expect_true(is.na(iprobs1na_bb$problematic[2])) + expect_false(is.na(iprobs1na_bb$problematic[1])) + expect_false(is.na(iprobs1na_bb$margin[1])) + + expect_true(all(is.na(iprobs1na_bb2$probabilities[2,]))) + expect_true(is.na(iprobs1na_bb2$problematic[2])) + expect_false(is.na(iprobs1na_bb2$problematic[1])) + expect_false(is.na(iprobs1na_bb2$margin[1])) + + expect_true(all(is.na(iprobs1na_wpo$probabilities[2,]))) + expect_true(is.na(iprobs1na_wpo$problematic[2])) + expect_false(is.na(iprobs1na_wpo$problematic[1])) + expect_false(is.na(iprobs1na_wpo$margin[1])) + + expect_true(all(is.na(iprobs1na_or$probabilities[2,]))) + expect_true(is.na(iprobs1na_or$problematic[2])) + expect_false(is.na(iprobs1na_or$problematic[1])) + expect_false(is.na(iprobs1na_or$margin[1])) + + expect_true(all(is.na(iprobs1na_power$probabilities[2,]))) + expect_true(is.na(iprobs1na_power$problematic[2])) + expect_false(is.na(iprobs1na_power$problematic[1])) + expect_false(is.na(iprobs1na_power$margin[1])) + + expect_true(all(is.na(iprobs1na_additive$probabilities[2,]))) + expect_true(is.na(iprobs1na_additive$problematic[2])) + expect_false(is.na(iprobs1na_additive$problematic[1])) + expect_false(is.na(iprobs1na_additive$margin[1])) + + expect_true(all(is.na(iprobs1na_jsd$probabilities[2,]))) + expect_true(is.na(iprobs1na_jsd$problematic[2])) + expect_false(is.na(iprobs1na_jsd$problematic[1])) + expect_false(is.na(iprobs1na_jsd$margin[1])) + + +}) + + + +context("Implied odds") + + +my_probs <- rbind((1/c(1.5, 5, 7.5)), + c(0.1, 0.2, 0.7), + c(0.01, 0.3, 0.69)) + +my_margin <- 0.022 + + +iodds1_basic <- implied_odds(my_probs, method='basic', margin = my_margin) +iodds1_shin <- implied_odds(my_probs, method='shin', margin = my_margin) +iodds1_shin2 <- implied_odds(my_probs, method='shin', margin = my_margin, grossmargin = 0.01) +iodds1_bb <- implied_odds(my_probs, method='bb', margin = my_margin) +iodds1_bb2 <- implied_odds(my_probs, method='bb', margin = my_margin, grossmargin = 0.01) +iodds1_wpo <- implied_odds(my_probs, method='wpo', margin = my_margin) +iodds1_or <- implied_odds(my_probs, method='or', margin = my_margin) +iodds1_power <- implied_odds(my_probs, method='power', margin = my_margin) +iodds1_additive <- implied_odds(my_probs, method='additive', margin = my_margin) + + +iodds1_basic0 <- implied_odds(my_probs, method='basic', margin = 0) +iodds1_bb0 <- implied_odds(my_probs, method='bb', margin = 0) +iodds1_wpo0 <- implied_odds(my_probs, method='wpo', margin = 0) +iodds1_or0 <- implied_odds(my_probs, method='or', margin = 0) +iodds1_power0 <- implied_odds(my_probs, method='power', margin = 0) +iodds1_additive0 <- implied_odds(my_probs, method='additive', margin = 0) + + + +test_that("Output", { + + expect_equal(class(iodds1_basic), 'list') + expect_equal(class(iodds1_shin), 'list') + expect_equal(class(iodds1_shin2), 'list') + expect_equal(class(iodds1_bb), 'list') + expect_equal(class(iodds1_bb0), 'list') + expect_equal(class(iodds1_bb2), 'list') + expect_equal(class(iodds1_wpo), 'list') + expect_equal(class(iodds1_wpo0), 'list') + expect_equal(class(iodds1_or), 'list') + expect_equal(class(iodds1_or0), 'list') + expect_equal(class(iodds1_power), 'list') + expect_equal(class(iodds1_power0), 'list') + expect_equal(class(iodds1_additive), 'list') + expect_equal(class(iodds1_additive0), 'list') + + + # Sum of improper probabilties sum to 1 + margin + expect_true(all(abs(rowSums(1 / iodds1_basic$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_shin$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_shin2$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_bb$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_bb2$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_wpo$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_or$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_power$odds) - (1 + my_margin)) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_additive$odds) - (1 + my_margin)) <= toll)) + + # When theres no margin, probabilities should sum to 1. + expect_true(all(abs(rowSums(1 / iodds1_basic0$odds) - 1) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_bb0$odds) - 1) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_wpo0$odds) - 1) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_or0$odds) - 1) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_power0$odds) - 1) <= toll)) + expect_true(all(abs(rowSums(1 / iodds1_additive0$odds) - 1) <= toll)) + + # Check the coefficients for being alright. + expect_true(all(iodds1_shin$zvalues > 0)) + expect_true(all(iodds1_shin2$zvalues > 0)) + + expect_true(all(iodds1_bb0$zvalues >= 0)) + expect_true(all(iodds1_bb$zvalues > 0)) + expect_true(all(iodds1_bb2$zvalues > 0)) + + expect_true(all(iodds1_or$odds_ratios > 1)) + expect_true(all(iodds1_or0$odds_ratios == 1)) + + expect_true(all(iodds1_power0$exponents == 1)) + expect_true(all(iodds1_power$exponents < 1)) + + expect_true(all(iodds1_additive$odds > 1)) + expect_true(all(iodds1_additive0$odds > 1)) + + + # Check the odds. + expect_true(all(iodds1_basic$odds > 1)) + expect_true(all(iodds1_shin$odds > 1)) + expect_true(all(iodds1_shin2$odds > 1)) + expect_true(all(iodds1_bb$odds > 1)) + expect_true(all(iodds1_bb2$odds > 1)) + expect_true(all(iodds1_wpo$odds > 1)) + expect_true(all(iodds1_or$odds > 1)) + expect_true(all(iodds1_power$odds > 1)) + expect_true(all(iodds1_additive$odds > 1)) + + expect_true(all(iodds1_basic0$odds > 1)) + expect_true(all(iodds1_bb0$odds > 1)) + expect_true(all(iodds1_wpo0$odds > 1)) + expect_true(all(iodds1_or0$odds > 1)) + expect_true(all(iodds1_power0$odds > 1)) + expect_true(all(iodds1_additive0$odds > 1)) + +}) + + +context("Converting between odds and probabilities") + + +# Re-compute odds. + +idx <- 3 # The row in my_odds to check. + +iodds1_basic_r <- implied_odds(iprobs1_basic$probabilities[idx,], + method='basic', margin = iprobs1_basic$margin[idx]) + +iodds1_shin_r <- implied_odds(iprobs1_shin$probabilities[idx,], + method='shin', margin = iprobs1_shin$margin[idx]) + +iodds1_shin2_r <- implied_odds(iprobs1_shin2$probabilities[idx,], + method='shin', margin = iprobs1_shin$margin[idx], grossmargin = 0.01) + +iodds1_bb_r <- implied_odds(iprobs1_bb$probabilities[idx,], + method='bb', margin = iprobs1_bb$margin[idx]) + +iodds1_bb2_r <- implied_odds(iprobs1_bb2$probabilities[idx,], + method='bb', margin = iprobs1_bb2$margin[idx], grossmargin = 0.01) + +iodds1_wpo_r <- implied_odds(iprobs1_wpo$probabilities[idx,], + method='wpo', margin = iprobs1_wpo$margin[idx]) + +iodds1_or_r <- implied_odds(iprobs1_or$probabilities[idx,], + method='or', margin = iprobs1_or$margin[idx]) + +iodds1_power_r <- implied_odds(iprobs1_power$probabilities[idx,], + method='power', margin = iprobs1_power$margin[idx]) + +iodds1_additive_r <- implied_odds(iprobs1_additive$probabilities[idx,], + method='additive', margin = iprobs1_additive$margin[idx]) + + +test_that("Results", { + + # Check that we can recover the original odds. + expect_true(all(abs(iodds1_basic_r$odds - my_odds[idx,]) <= toll)) + expect_true(all(abs(iodds1_shin_r$odds - my_odds[idx,]) <= 0.001)) + expect_true(all(abs(iodds1_shin2_r$odds - my_odds[idx,]) <= 0.00015)) + + expect_true(all(abs(iodds1_bb_r$odds - my_odds[idx,]) <= toll)) + expect_true(all(abs(iodds1_bb2_r$odds - my_odds[idx,]) <= toll)) + expect_true(all(abs(iodds1_wpo_r$odds - my_odds[idx,]) <= toll)) + expect_true(all(abs(iodds1_or_r$odds - my_odds[idx,]) <= toll)) + expect_true(all(abs(iodds1_power_r$odds - my_odds[idx,]) <= toll)) + expect_true(all(abs(iodds1_additive_r$odds - my_odds[idx,]) <= toll)) + + # Check that the coefficients are the same. + expect_true(all(abs(iodds1_shin_r$zvalues - iprobs1_shin$zvalues[idx]) <= 0.0001)) + expect_true(all(abs(iodds1_bb_r$zvalues - iprobs1_bb$zvalues[idx]) <= toll)) + expect_true(all(abs(iodds1_bb2_r$zvalues - iprobs1_bb2$zvalues[idx]) <= toll)) + expect_true(all(abs(iodds1_wpo_r$specific_margins - iprobs1_wpo$specific_margins[idx,]) <= toll)) + expect_true(abs(iodds1_or_r$odds_ratios - iprobs1_or$odds_ratios[idx]) <= toll) + expect_true(abs(iodds1_power_r$exponents - iprobs1_power$exponents[idx]) <= toll) + +}) + + + From 1e6d38e007ca67c0a40a4793ca8a536a7526d5dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Christoffer=20Lindstr=C3=B8m?= Date: Sun, 11 Jun 2023 18:50:02 +0000 Subject: [PATCH 4/4] version 0.5 --- DESCRIPTION | 6 +- MD5 | 26 +- NEWS.md | 9 + R/implied_odds.R | 496 +++++++++++++++---------------- R/implied_probabilities.R | 215 +++++++++++--- build/vignette.rds | Bin 221 -> 247 bytes inst/doc/Troubleshooting.R | 26 ++ inst/doc/Troubleshooting.Rmd | 162 +++++++++++ inst/doc/Troubleshooting.html | 237 +++++++++++++++ inst/doc/introduction.R | 11 + inst/doc/introduction.Rmd | 529 ++++++++++++++++++---------------- inst/doc/introduction.html | 146 ++++++---- man/implied_probabilities.Rd | 42 ++- tests/testthat/test_1.R | 98 ++++++- vignettes/Troubleshooting.Rmd | 162 +++++++++++ vignettes/introduction.Rmd | 529 ++++++++++++++++++---------------- 16 files changed, 1812 insertions(+), 882 deletions(-) create mode 100644 inst/doc/Troubleshooting.R create mode 100644 inst/doc/Troubleshooting.Rmd create mode 100644 inst/doc/Troubleshooting.html create mode 100644 vignettes/Troubleshooting.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 7da233f..83f03bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: implied Type: Package Title: Convert Between Bookmaker Odds and Probabilities -Version: 0.4.1 +Version: 0.5 Author: Jonas Christoffer Lindstrøm Maintainer: Jonas Christoffer Lindstrøm Description: Convert between bookmaker odds and probabilities. Eight different @@ -13,6 +13,6 @@ RoxygenNote: 7.1.2 Suggests: testthat (>= 2.0.1), knitr, rmarkdown VignetteBuilder: knitr NeedsCompilation: no -Packaged: 2022-06-08 19:26:29 UTC; Jonas Christoffer +Packaged: 2023-06-11 18:14:04 UTC; Jonas Christoffer Repository: CRAN -Date/Publication: 2022-06-08 19:40:02 UTC +Date/Publication: 2023-06-11 19:50:02 UTC diff --git a/MD5 b/MD5 index ce37a31..c46c8a9 100644 --- a/MD5 +++ b/MD5 @@ -1,15 +1,19 @@ -4c24cd9373649dc27b767d4e18df6e2e *DESCRIPTION +ed8a77ffdb40f140f6980f99e45a2f4c *DESCRIPTION d32331dc3ccbe103bf204525d1ff82dd *NAMESPACE -ba113a7e4e7107dcf21763cadb00b515 *NEWS.md -20072e33af45ea50e9186b4e6d95e7ac *R/implied_odds.R -2db701891bf7b24143c51b12b5aa5440 *R/implied_probabilities.R +5175f26d9c3bc4f740a5496d4660b7dc *NEWS.md +8a73301aac80eaec88540c50efe4ee62 *R/implied_odds.R +078d65c0870d882ebf7e890a836385cb *R/implied_probabilities.R c71d10f8db03d579fa1c0c6e76959a7f *R/zzz.R -98b688bea94e81f1d052255a3ed20dca *build/vignette.rds -7532bb544e1b432d4598329e513eca30 *inst/doc/introduction.R -233f2eab794eec18ddde2c445b0f177a *inst/doc/introduction.Rmd -d9aa018d3e35e98ee04eb169492869f0 *inst/doc/introduction.html +e8bc7681acd8e612eaab83ac4fc77076 *build/vignette.rds +9f3d124e6bbd9e052fef01e69d83ddba *inst/doc/Troubleshooting.R +111e67fb6ca9478877c5ab056380f1e0 *inst/doc/Troubleshooting.Rmd +cce10b1137bac370165e60c135599a9e *inst/doc/Troubleshooting.html +b95af5b478898d4e0b06423d0f827ad9 *inst/doc/introduction.R +8d8c5b2afdcae2048b42605d42ef2a70 *inst/doc/introduction.Rmd +f697ee1b3f19b8d8b4a085a46d84f176 *inst/doc/introduction.html 32101c917ca72f6d3e0d880a57a802c7 *man/implied_odds.Rd -3af6367c1b5bc9a22f2c8920f4fe26dc *man/implied_probabilities.Rd +2f28bbc547f1a4eb90cbfa92ac2a2cff *man/implied_probabilities.Rd bc77ebadaa37370915e00adac9036b01 *tests/testthat.R -0762263852cfc75a8a9a29e33b155b1b *tests/testthat/test_1.R -233f2eab794eec18ddde2c445b0f177a *vignettes/introduction.Rmd +60e9727bd36a19cbb31f8324f0ea3ac1 *tests/testthat/test_1.R +111e67fb6ca9478877c5ab056380f1e0 *vignettes/Troubleshooting.Rmd +8d8c5b2afdcae2048b42605d42ef2a70 *vignettes/introduction.Rmd diff --git a/NEWS.md b/NEWS.md index 0ea57d2..6eee94f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,13 @@ + +# implied Version 0.5 +* Can now convert odds to probabilities that should sum to other values than 1, using the target_probability argument in implied_probabilities(). +* New vignette 'Troubleshooting'. +* Introduction vignette updated with how to use the new target_probability option. +* New option 'uniroot_options' in implied_probabilities, to better control the uniroot solver. +* Fixed many spelling errors in the documentation. + + # implied Version 0.4.1 * Small change to how the 'jsd' method in implied_probabilities() works, so that it works in some cases where it used to fail. * Fixed a link in the Introduction vignette. diff --git a/R/implied_odds.R b/R/implied_odds.R index e42778b..35f191d 100644 --- a/R/implied_odds.R +++ b/R/implied_odds.R @@ -1,248 +1,248 @@ - - - -# The functions xx_func_o(coef, probs) transforms proper probabilities (that sum to 1) -# into improper probabilities as a function of the input coeffient. -# The corresponding functions xx_o_solvefor(coef, probs, margin) are used -# with uniroot to find the coefficient that makes the transformed probabilities -# sum to the desired margin. - -# Transform the probabilities using the Shin's method, -# for a given value of the odds ratio cc. -shin_func_o <- function(zz, probs, grossmargin=NULL){ - - # Eq. 5 in Shin 1993. - yy <- sqrt((zz*probs) + ((1-zz)*probs^2)) - res <- yy * sum(yy) - - if (!is.null(grossmargin)){ - # Eq. 14 in in Fingleton & Waldron 1999 - res <- res / (1 - grossmargin) - } - - return(res) -} - -# the condition that the sum of the probabilites must sum to 1. -# Used with uniroot. -shin_o_solvefor <- function(zz, probs, margin, grossmargin=NULL){ - tmp <- shin_func_o(zz, probs, grossmargin) - sum(tmp) - (1 + margin) -} - - -# Transform the probabilities using the odds ratio method, -# for a given value of the odds ratio cc. -or_func_o <- function(cc, probs){ - or_probs <- cc * probs - or_probs / (1 - probs + or_probs) -} - -# The condition that the sum of the transformed probabilites -# must sum to 1 + margin. -or_o_solvefor <- function(cc, probs, margin){ - tmp <- or_func_o(cc, probs) - sum(tmp) - (1 + margin) -} - - -# Transform the probabilities using the power method. -pwr_func_o <- function(nn, probs){ - probs^(nn) -} - -# The condition that the sum of the transformed probabilites -# must sum to 1 + margin. -pwr_o_solvefor <- function(nn, probs, margin){ - tmp <- pwr_func_o(nn, probs) - sum(tmp) - (1 + margin) -} - - - -#' Implied odds with added margin from probabilities. -#' -#' This functions converts probabilities to odds in decimal format, while adding overround. -#' The function does the inverse of what the function \code{\link{implied_probabilities}} does. -#' -#' @param probabilities A matrix or numeric of probabilities, where each column is an outcome. -#' @param method A string giving the method to use. Valid methods are 'basic', 'shin', 'bb', 'wpo', 'or', 'power' or 'additive'. -#' @param margin numeric. How large margin (aka overround) should be added to the probabilities. -#' @param grossmargin Numeric. Must be 0 or greater. See the details. -#' @param normalize Logical. If TRUE (default), scale the input probabilites to sum to 1. -#' -#' @return A named list. The first component is named 'odds' and contain a matrix of -#' implied odds. The second depends on the method used to compute the probabilities. -#' -#' @export -implied_odds <- function(probabilities, method = 'basic', margin = 0, - grossmargin = NULL, normalize=TRUE){ - - stopifnot(length(method) == 1, - length(margin) == 1, - tolower(method) %in% c('basic', 'shin', 'bb', 'wpo', 'or', 'power', 'additive'), - all(probabilities >= 0, na.rm=TRUE)) - - - - if (!is.matrix(probabilities)){ - - if ('data.frame' %in% class(probabilities)){ - probabilities <- as.matrix(probabilities) - } else { - probabilities <- matrix(probabilities, nrow=1, - dimnames = list(NULL, names(probabilities))) - } - } - - # Make sure the probabilities sum to exactly 1. - if (normalize){ - probabilities <- probabilities / rowSums(probabilities) - } - - # Prepare the list that will be returned. - out <- vector(mode='list', length=1) - names(out) <- c('odds') - - # Some useful quantities - n_probs <- nrow(probabilities) - n_outcomes <- ncol(probabilities) - - # Missing values - missing_idx <- apply(probabilities, MARGIN = 1, - FUN = function(x) any(is.na(x))) - - # inverted_probs <- 1 / probabilities - - if (method == 'basic'){ - - out$odds <- 1 / (probabilities * (1 + margin)) - - } else if (method == 'shin'){ - - odds <- matrix(nrow=n_probs, ncol=n_outcomes) - zz <- numeric(n_probs) - - for (ii in 1:n_probs){ - - # Skip rows with missing values. - if (missing_idx[ii] == TRUE){ - next - } - - if (margin != 0){ - res <- stats::uniroot(f=shin_o_solvefor, interval = c(0, 0.4), - probs=probabilities[ii,], - margin = margin, grossmargin = grossmargin) - zz[ii] <- res$root - } else { - zz[ii] <- 0 - } - - odds[ii,] <- 1 / shin_func_o(zz=zz[ii], probs = probabilities[ii,], grossmargin = grossmargin) - } - - out$odds <- odds - out$zvalues <- zz - - } else if (method == 'bb'){ - - if (is.null(grossmargin)){ - grossmargin <- 0 - } else { - stopifnot(grossmargin >= 0, - length(grossmargin) == 1) - } - - zz <- (((1-grossmargin)*(1 + margin)) - 1) / (n_outcomes-1) - out$odds <- 1 / ((1+margin) * (((probabilities*(1-zz)) + zz) / ((n_outcomes-1)*zz + 1))) - - out$zvalues <- zz - - } else if (method == 'wpo'){ - # Margin Weights Proportional to the Odds. - # Method from the Wisdom of the Crowds pdf. - invprob <- 1 / probabilities - out$specific_margins <- (margin * invprob) / n_outcomes - out$odds <- invprob / (1 + out$specific_margins) - - } else if (method == 'or'){ - - odds <- matrix(nrow=n_probs, ncol=n_outcomes) - odds_ratios <- numeric(n_probs) - - for (ii in 1:n_probs){ - - # Skip rows with missing values. - if (missing_idx[ii] == TRUE){ - next - } - - if (margin != 0){ - res <- stats::uniroot(f=or_o_solvefor, interval = c(0.05, 5), - probs=probabilities[ii,], margin = margin) - odds_ratios[ii] <- res$root - } else { - odds_ratios[ii] <- 1 - } - - odds[ii,] <- 1 / or_func_o(cc=odds_ratios[ii], probs = probabilities[ii,]) - } - - out$odds <- odds - out$odds_ratios <- odds_ratios - - } else if (method == 'power'){ - - odds <- matrix(nrow=n_probs, ncol=n_outcomes) - exponents <- numeric(n_probs) - - for (ii in 1:n_probs){ - - # Skip rows with missing values. - if (missing_idx[ii] == TRUE){ - next - } - - if (margin != 0){ - res <- stats::uniroot(f=pwr_o_solvefor, interval = c(0.0001, 1.1), - probs=probabilities[ii,], margin = margin) - exponents[ii] <- res$root - } else { - exponents[ii] <- 1 - } - - odds[ii,] <- 1 / pwr_func_o(nn=exponents[ii], probs = probabilities[ii,]) - } - - out$odds <- odds - out$exponents <- exponents - - } else if (method == 'additive'){ - - odds <- matrix(nrow=n_probs, ncol=n_outcomes) - - for (ii in 1:n_probs){ - - # Skip rows with missing values. - if (missing_idx[ii] == TRUE){ - next - } - - odds[ii,] <- 1 / (probabilities[ii,] + (margin / n_outcomes)) - } - - out$odds <- odds - - } - - # Make sure the matrix of implied probabilities has column names. - if (!is.null(colnames(probabilities))){ - colnames(out$odds) <- colnames(probabilities) - } - - - return(out) - - -} + + + +# The functions xx_func_o(coef, probs) transforms proper probabilities (that sum to 1) +# into improper probabilities as a function of the input coefficient. +# The corresponding functions xx_o_solvefor(coef, probs, margin) are used +# with uniroot to find the coefficient that makes the transformed probabilities +# sum to the desired margin. + +# Transform the probabilities using the Shin's method, +# for a given value of the odds ratio cc. +shin_func_o <- function(zz, probs, grossmargin=NULL){ + + # Eq. 5 in Shin 1993. + yy <- sqrt((zz*probs) + ((1-zz)*probs^2)) + res <- yy * sum(yy) + + if (!is.null(grossmargin)){ + # Eq. 14 in in Fingleton & Waldron 1999 + res <- res / (1 - grossmargin) + } + + return(res) +} + +# the condition that the sum of the probabilites must sum to 1. +# Used with uniroot. +shin_o_solvefor <- function(zz, probs, margin, grossmargin=NULL){ + tmp <- shin_func_o(zz, probs, grossmargin) + sum(tmp) - (1 + margin) +} + + +# Transform the probabilities using the odds ratio method, +# for a given value of the odds ratio cc. +or_func_o <- function(cc, probs){ + or_probs <- cc * probs + or_probs / (1 - probs + or_probs) +} + +# The condition that the sum of the transformed probabilites +# must sum to 1 + margin. +or_o_solvefor <- function(cc, probs, margin){ + tmp <- or_func_o(cc, probs) + sum(tmp) - (1 + margin) +} + + +# Transform the probabilities using the power method. +pwr_func_o <- function(nn, probs){ + probs^(nn) +} + +# The condition that the sum of the transformed probabilites +# must sum to 1 + margin. +pwr_o_solvefor <- function(nn, probs, margin){ + tmp <- pwr_func_o(nn, probs) + sum(tmp) - (1 + margin) +} + + + +#' Implied odds with added margin from probabilities. +#' +#' This functions converts probabilities to odds in decimal format, while adding overround. +#' The function does the inverse of what the function \code{\link{implied_probabilities}} does. +#' +#' @param probabilities A matrix or numeric of probabilities, where each column is an outcome. +#' @param method A string giving the method to use. Valid methods are 'basic', 'shin', 'bb', 'wpo', 'or', 'power' or 'additive'. +#' @param margin numeric. How large margin (aka overround) should be added to the probabilities. +#' @param grossmargin Numeric. Must be 0 or greater. See the details. +#' @param normalize Logical. If TRUE (default), scale the input probabilites to sum to 1. +#' +#' @return A named list. The first component is named 'odds' and contain a matrix of +#' implied odds. The second depends on the method used to compute the probabilities. +#' +#' @export +implied_odds <- function(probabilities, method = 'basic', margin = 0, + grossmargin = NULL, normalize=TRUE){ + + stopifnot(length(method) == 1, + length(margin) == 1, + tolower(method) %in% c('basic', 'shin', 'bb', 'wpo', 'or', 'power', 'additive'), + all(probabilities >= 0, na.rm=TRUE)) + + + + if (!is.matrix(probabilities)){ + + if ('data.frame' %in% class(probabilities)){ + probabilities <- as.matrix(probabilities) + } else { + probabilities <- matrix(probabilities, nrow=1, + dimnames = list(NULL, names(probabilities))) + } + } + + # Make sure the probabilities sum to exactly 1. + if (normalize){ + probabilities <- probabilities / rowSums(probabilities) + } + + # Prepare the list that will be returned. + out <- vector(mode='list', length=1) + names(out) <- c('odds') + + # Some useful quantities + n_probs <- nrow(probabilities) + n_outcomes <- ncol(probabilities) + + # Missing values + missing_idx <- apply(probabilities, MARGIN = 1, + FUN = function(x) any(is.na(x))) + + # inverted_probs <- 1 / probabilities + + if (method == 'basic'){ + + out$odds <- 1 / (probabilities * (1 + margin)) + + } else if (method == 'shin'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + zz <- numeric(n_probs) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + if (margin != 0){ + res <- stats::uniroot(f=shin_o_solvefor, interval = c(0, 0.4), + probs=probabilities[ii,], + margin = margin, grossmargin = grossmargin) + zz[ii] <- res$root + } else { + zz[ii] <- 0 + } + + odds[ii,] <- 1 / shin_func_o(zz=zz[ii], probs = probabilities[ii,], grossmargin = grossmargin) + } + + out$odds <- odds + out$zvalues <- zz + + } else if (method == 'bb'){ + + if (is.null(grossmargin)){ + grossmargin <- 0 + } else { + stopifnot(grossmargin >= 0, + length(grossmargin) == 1) + } + + zz <- (((1-grossmargin)*(1 + margin)) - 1) / (n_outcomes-1) + out$odds <- 1 / ((1+margin) * (((probabilities*(1-zz)) + zz) / ((n_outcomes-1)*zz + 1))) + + out$zvalues <- zz + + } else if (method == 'wpo'){ + # Margin Weights Proportional to the Odds. + # Method from the Wisdom of the Crowds pdf. + invprob <- 1 / probabilities + out$specific_margins <- (margin * invprob) / n_outcomes + out$odds <- invprob / (1 + out$specific_margins) + + } else if (method == 'or'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + odds_ratios <- numeric(n_probs) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + if (margin != 0){ + res <- stats::uniroot(f=or_o_solvefor, interval = c(0.05, 5), + probs=probabilities[ii,], margin = margin) + odds_ratios[ii] <- res$root + } else { + odds_ratios[ii] <- 1 + } + + odds[ii,] <- 1 / or_func_o(cc=odds_ratios[ii], probs = probabilities[ii,]) + } + + out$odds <- odds + out$odds_ratios <- odds_ratios + + } else if (method == 'power'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + exponents <- numeric(n_probs) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + if (margin != 0){ + res <- stats::uniroot(f=pwr_o_solvefor, interval = c(0.0001, 1.1), + probs=probabilities[ii,], margin = margin) + exponents[ii] <- res$root + } else { + exponents[ii] <- 1 + } + + odds[ii,] <- 1 / pwr_func_o(nn=exponents[ii], probs = probabilities[ii,]) + } + + out$odds <- odds + out$exponents <- exponents + + } else if (method == 'additive'){ + + odds <- matrix(nrow=n_probs, ncol=n_outcomes) + + for (ii in 1:n_probs){ + + # Skip rows with missing values. + if (missing_idx[ii] == TRUE){ + next + } + + odds[ii,] <- 1 / (probabilities[ii,] + (margin / n_outcomes)) + } + + out$odds <- odds + + } + + # Make sure the matrix of implied probabilities has column names. + if (!is.null(colnames(probabilities))){ + colnames(out$odds) <- colnames(probabilities) + } + + + return(out) + + +} diff --git a/R/implied_probabilities.R b/R/implied_probabilities.R index ea5454d..e558a6e 100644 --- a/R/implied_probabilities.R +++ b/R/implied_probabilities.R @@ -2,23 +2,23 @@ #' Implied probabilities from bookmaker odds. #' -#' This function calculate the implied probabilties from bookmaker odds in decimal format, while -#' accounting for overround in the odds. +#' This function calculate the implied probabilities from bookmaker odds in decimal format, while +#' accounting for over-round in the odds. #' #' The method 'basic' is the simplest method, and computes the implied probabilities by #' dividing the inverted odds by the sum of the inverted odds. #' #' The methods 'wpo' (Weights Proportional to the Odds), 'or' (Odds Ratio) and 'power' are form the Wisdom of the Crowds document (the updated version) by -#' Joseph Buchdahl. The method 'or' is origianlly by Cheung (2015), and the method 'power' is there referred +#' Joseph Buchdahl. The method 'or' is originally by Cheung (2015), and the method 'power' is there referred #' to as the logarithmic method. #' #' The method 'shin' uses the method by Shin (1992, 1993). This model assumes that there is a fraction of #' insider trading, and that the bookmakers tries to maximize their profits. In addition to providing -#' implied probabilties, the method also gives an estimate of the proportion if inside trade, denoted z. Two algorithms -#' are implemented for finding the probabilities and z. Which algorithm to use is chosen via the shin_mehod argument. +#' implied probabilities, the method also gives an estimate of the proportion if inside trade, denoted z. Two algorithms +#' are implemented for finding the probabilities and z. Which algorithm to use is chosen via the shin_method argument. #' The default method (shin_method = 'js') is based on the algorithm in Jullien & Salanié (1994). The 'uniroot' #' method uses R's built in equation solver to find the probabilities. The uniroot approach is also used for the -#' 'pwr' and 'or' methods. The two methods might give slightly different answers, especially when the bookamer margin +#' 'pwr' and 'or' methods. The two methods might give slightly different answers, especially when the bookmaker margin #' (and z) is small. #' #' The 'bb' (short for "balanced books") method is from Fingleton & Waldron (1999), and is a variant of Shin's method. It too assume @@ -27,22 +27,30 @@ #' #' Both the 'shin' and 'bb' methods can be used together with the 'grossmargin' argument. This is also #' from the Fingleton & Waldron (1999) paper, and adds some further assumption to the calculations, -#' related to opperating costs. grossmargin should be 0 (default) or greater, typical range is 0 to 0.05. +#' related to operating costs. grossmargin should be 0 (default) or greater, typical range is 0 to 0.05. #' For values other than 0, this might sometimes cause some probabilities to not be identifiable. A warning #' will be given if this happens. #' #' The method 'jsd' was developed by Christopher D. Long, and described in a series of Twitter postings #' and a python implementation posted on GitHub. #' +#' Methods 'shin', 'or', 'power', and 'jsd' use the uniroot solver to find the correct probabilities. Sometimes it will fail +#' to find a solution, but it can be made to work by tuning some setting. The uniroot_options argument accepts a list with +#' options that are passed on to the uniroot function. Currently the interval, maxit, tol and extendInt argument of +#' uniroot can be changed. See the Troubleshooting vignette for more details. +#' #' #' @param odds A matrix or numeric of bookmaker odds. The odds must be in the decimal format. #' @param method A string giving the method to use. Valid methods are 'basic', 'shin', 'bb', #' 'wpo', 'or', 'power', 'additive', and 'jsd'. #' @param normalize Logical. Some of the methods will give small rounding errors. If TRUE (default) -#' a final normalization is applied to make absoultely sure the +#' a final normalization is applied to make absolutely sure the #' probabilities sum to 1. +#' @param target_probability Numeric. The value the probabilities should sum to. Default is 1. #' @param grossmargin Numeric. Must be 0 or greater. See the details. -#' @param shin_method Character. Either 'js' (defeault) or 'uniroot'. See the details. +#' @param shin_method Character. Either 'js' (default) or 'uniroot'. See the details. +#' @param shin_maxiter numeric. Max number of iterations for shin method 'js'. +#' @param uniroot_options list. Option passed on to the uniroot solver, for those methods where it is applicable. See 'details'. #' #' #' @return A named list. The first component is named 'probabilities' and contain a matrix of @@ -52,14 +60,14 @@ #' \item{ zvalues (method = 'shin' and method='bb'): The estimated amount of insider trade.} #' \item{ specific_margins (method = 'wpo'): Matrix of the margins applied to each outcome.} #' \item{ odds_ratios (method = 'or'): Numeric with the odds ratio that are used to convert true -#' probabilities to bookmaker probabilties.} +#' probabilities to bookmaker probabilities.} #' \item{ exponents (method = 'power'): The (inverse) exponents that are used to convert true -#' probabilities to bookmaker probabilties.} +#' probabilities to bookmaker probabilities.} #' \item{ distance (method = 'jsd'): The Jensen-Shannon distances that are used to convert true -#' probabilities to bookmaker probabilties.} +#' probabilities to bookmaker probabilities.} #' } #' -#' The fourth compnent 'problematic' is a logical vector called indicating if any probabilites has fallen +#' The fourth component 'problematic' is a logical vector called indicating if any probabilities has fallen #' outside the 0-1 range, or if there were some other problem computing the probabilities. #' #' @@ -85,21 +93,35 @@ #'converted_odds$probabilities #' #' @export -implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmargin = 0, - shin_method = 'js'){ +implied_probabilities <- function(odds, method='basic', normalize=TRUE, target_probability = 1, + grossmargin = 0, shin_method = 'js', shin_maxiter = 1000, + uniroot_options = NULL){ stopifnot(length(method) == 1, tolower(method) %in% c('basic', 'shin', 'bb', 'wpo', 'or', 'power', 'additive', 'jsd'), all(odds >= 1, na.rm=TRUE), + length(target_probability) == 1, + target_probability > 0, grossmargin >= 0, shin_method %in% c('js', 'uniroot'), - length(shin_method) == 1) + length(shin_method) == 1, + length(shin_maxiter) == 1, + shin_maxiter > 1, + is.null(uniroot_options) | is.list(uniroot_options)) + if (method == 'shin' & shin_method == 'uniroot' & grossmargin != 0){ shin_method <- 'js' message('shin_method uniroot does not work when grossmargin is not 0. Method js will be used.') } + if (method == 'shin' & shin_method == 'js' & target_probability != 1){ + shin_method <- 'uniroot' + grossmargin <- 0 + message('shin_method js does not work when target_probability is not 1. Method uniroot will be used with grossmargin = 0.') + } + + if (!is.matrix(odds)){ if ('data.frame' %in% class(odds)){ @@ -110,6 +132,16 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar } } + if (method %in% c('shin', 'or', 'power', 'jsd')){ + uniroot_opts <- default_uniroot_opts(method = method) + + if (is.list(uniroot_options)){ + uniroot_opts <- utils::modifyList(uniroot_opts, uniroot_options) + } + + } + + # Prepare the list that will be returned. out <- vector(mode='list', length=2) names(out) <- c('probabilities', 'margin') @@ -121,7 +153,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar # Inverted odds and margins inverted_odds <- 1 / odds inverted_odds_sum <- rowSums(inverted_odds) - out$margin <- inverted_odds_sum - 1 + out$margin <- inverted_odds_sum - target_probability # Missing values missing_idx <- apply(odds, MARGIN = 1, @@ -132,9 +164,15 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar stop('Some inverse odds sum to less than 1.') } + # Vector to keep track of uniroot problems. + use_uniroot <- method %in% c('or', 'power', 'jsd') | (method %in% 'shin' & shin_method == 'uniroot') + if (use_uniroot){ + problematic_uniroot <- logical(n_odds) + problematic_uniroot_messages <- character(n_odds) + } if (method == 'basic'){ - out$probabilities <- inverted_odds / inverted_odds_sum + out$probabilities <- (target_probability * inverted_odds) / inverted_odds_sum } else if (method == 'shin'){ @@ -144,7 +182,6 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar problematic_shin <- logical(n_odds) if (shin_method == 'js'){ - #if (shin_method == 'js' | grossmargin != 0){ for (ii in 1:n_odds){ # Skip rows with missing values. @@ -155,7 +192,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar # initialize zz at 0 zz_tmp <- 0 - for (jj in 1:1000){ + for (jj in 1:shin_maxiter){ zz_prev <- zz_tmp if (grossmargin != 0){ @@ -166,7 +203,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar if (abs(zz_tmp - zz_prev) <= .Machine$double.eps^0.25){ break - } else if (jj >= 1000){ + } else if (jj >= shin_maxiter){ problematic_shin[ii] <- TRUE } @@ -174,7 +211,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar probs[ii,] <- shin_func(zz=zz_tmp, io = inverted_odds[ii,]) } } - } else { + } else if (shin_method == 'uniroot'){ for (ii in 1:n_odds){ # Skip rows with missing values. @@ -182,11 +219,19 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar next } - res <- stats::uniroot(f=shin_solvefor, interval = c(0,0.4), io=inverted_odds[ii,]) + + res <- uniroot2(f=shin_solvefor, io=inverted_odds[ii,], trgtprob = target_probability, + interval = uniroot_opts$interval, extendInt = uniroot_opts$extendInt, + tol = uniroot_opts$tol, maxiter = uniroot_opts$maxiter) zvalues[ii] <- res$root probs[ii,] <- shin_func(zz=res$root, io = inverted_odds[ii,]) + if (!is.null(res$message)){ + problematic_uniroot[ii] <- TRUE + problematic_uniroot_messages[ii] <- res$message + } + } } @@ -194,13 +239,14 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar out$zvalues <- zvalues if (any(problematic_shin[!missing_idx])){ - warning(sprintf('Could not find z: Did not converge in %d instances. Some results may be unreliable. See the "problematic" vector in the output.', + warning(sprintf('Could not find z: Did not converge in %d instances. Some results may be unreliable.', sum(problematic_shin))) } } else if (method == 'bb'){ - zz <- (((1-grossmargin)*inverted_odds_sum) - 1) / (n_outcomes-1) + # zz <- (((1-grossmargin) * inverted_odds_sum) - 1) / (n_outcomes-1) + zz <- (((1-grossmargin) * inverted_odds_sum) - target_probability) / (n_outcomes-target_probability) probs <- (((1-grossmargin) * inverted_odds) - zz) / (1-zz) out$probabilities <- probs @@ -225,9 +271,18 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar next } - res <- stats::uniroot(f=or_solvefor, interval = c(0.05, 5), io=inverted_odds[ii,]) + res <- uniroot2(f=or_solvefor, io=inverted_odds[ii,], trgtprob = target_probability, + interval = uniroot_opts$interval, extendInt = uniroot_opts$extendInt, + tol = uniroot_opts$tol, maxiter = uniroot_opts$maxiter) + odds_ratios[ii] <- res$root probs[ii,] <- or_func(cc=res$root, io = inverted_odds[ii,]) + + if (!is.null(res$message)){ + problematic_uniroot[ii] <- TRUE + problematic_uniroot_messages[ii] <- res$message + } + } out$probabilities <- probs @@ -245,9 +300,16 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar next } - res <- stats::uniroot(f=pwr_solvefor, interval = c(0.0001, 1), io=inverted_odds[ii,]) + res <- uniroot2(f=pwr_solvefor, io=inverted_odds[ii,], trgtprob = target_probability, + interval = uniroot_opts$interval, extendInt = uniroot_opts$extendInt, + tol = uniroot_opts$tol, maxiter = uniroot_opts$maxiter) exponents[ii] <- res$root probs[ii,] <- pwr_func(nn=res$root, io = inverted_odds[ii,]) + + if (!is.null(res$message)){ + problematic_uniroot[ii] <- TRUE + problematic_uniroot_messages[ii] <- res$message + } } out$probabilities <- probs @@ -264,7 +326,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar next } - probs[ii,] <- inverted_odds[ii,] - ((inverted_odds_sum[ii] - 1) / n_outcomes) + probs[ii,] <- inverted_odds[ii,] - ((inverted_odds_sum[ii] - target_probability) / n_outcomes) } out$probabilities <- probs @@ -280,25 +342,28 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar next } - # 0.1 seems to be a reasonable upper bound, with possibility of extending. - res <- stats::uniroot(f=jsd_solvefor, interval = c(0.0000001, 0.1), - io=inverted_odds[ii,], - tol=0.0000001) + res <- uniroot2(f=jsd_solvefor, io=inverted_odds[ii,], trgtprob = target_probability, + interval = uniroot_opts$interval, extendInt = uniroot_opts$extendInt, + tol = uniroot_opts$tol, maxiter = uniroot_opts$maxiter) jsds[ii] <- res$root probs[ii,] <- jsd_func(jsd=res$root, io = inverted_odds[ii,]) + + if (!is.null(res$message)){ + problematic_uniroot[ii] <- TRUE + problematic_uniroot_messages[ii] <- res$message + } } out$probabilities <- probs out$distance <- jsds - } - ## do a final normalization to make sure the probabilites + ## do a final normalization to make sure the probabilities ## sum to 1 without rounding errors. if (normalize){ - out$probabilities <- out$probabilities / rowSums(out$probabilities) + out$probabilities <- (target_probability * out$probabilities) / rowSums(out$probabilities) } # Make sure the matrix of implied probabilities has column names. @@ -306,7 +371,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar colnames(out$probabilities) <- colnames(odds) } - # check if there are any probabilites outside the 0-1 range. + # check if there are any probabilities outside the 0-1 range. problematic <- apply(out$probabilities, MARGIN = 1, FUN=function(x){any(x > 1 | x < 0)}) problematic[is.na(problematic)] <- TRUE problematic[missing_idx] <- NA @@ -316,6 +381,20 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar sum(problematic))) } + # Give warnings for problems when uniroot was used. + if (use_uniroot){ + + if (any(problematic_uniroot)){ + problematic[problematic_uniroot] <- TRUE + + problematic_uniroot_messages <- unique(problematic_uniroot_messages) + for (ww in 1:length(problematic_uniroot_messages)){ + warning(problematic_uniroot_messages[ww]) + } + } + } + + if (method == 'shin'){ problematic <- problematic | problematic_shin } @@ -323,7 +402,7 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar if (method %in% c('shin', 'bb')){ negative_z <- out$zvalues < 0 if (any(negative_z[!missing_idx])){ - warning(sprintf('z estimated to be negative: Some results may be unreliable. See the "problematic" vector in the output.', + warning(sprintf('z estimated to be negative: Some results may be unreliable.', negative_z)) } } @@ -340,6 +419,48 @@ implied_probabilities <- function(odds, method='basic', normalize=TRUE, grossmar # and be used with uniroot. ######################################################### + +default_uniroot_opts <- function(method){ + opts <- list(extendInt = 'yes', + maxiter = 1000, + tol = .Machine$double.eps^0.25) + + if (method == 'shin'){ + opts$interval <- c(0, 0.4) + } else if (method == 'or'){ + opts$interval <- c(0.95, 5) + } else if (method == 'power'){ + opts$interval <- c(0.0001, 1) + } else if (method == 'jsd'){ + opts$interval <- c(0.0000001, 0.1) + opts$tol = 0.000001 + } + + return(opts) + +} + + + +# Wrapper around uniroot, but returns a list with NA results +# if the solver fails. +uniroot2 <- function(f, interval, ..., + extendInt = 'no', tol = .Machine$double.eps^0.25, maxiter = 1000){ + + + res <- tryCatch({ + stats::uniroot(f = f, ..., interval = interval, extendInt = extendInt, tol = tol, maxiter = maxiter) + }, error = function(e){ + list(root = NA, message = as.character(e)) + }) + + return(res) + +} + + + + # Calculate the probabilities using Shin's formula, for a given value of z. # io = inverted odds. shin_func <- function(zz, io){ @@ -358,17 +479,17 @@ or_func <- function(cc, io){ # the condition that the sum of the probabilites must sum to 1. # Used with uniroot. -shin_solvefor <- function(zz, io){ +shin_solvefor <- function(zz, io, trgtprob){ tmp <- shin_func(zz, io) - 1 - sum(tmp) # 0 when the condition is satisfied. + sum(tmp) - trgtprob # 0 when the condition is satisfied. } # The condition that the sum of the probabilites must sum to 1. # This function calulates the true probability, given bookmaker # probabilites xx, and the odds ratio cc. -or_solvefor <- function(cc, io){ +or_solvefor <- function(cc, io, trgtprob){ tmp <- or_func(cc, io) - sum(tmp) - 1 + sum(tmp) - trgtprob } # power function. @@ -379,9 +500,9 @@ pwr_func <- function(nn, io){ # The condition that the sum of the probabilites must sum to 1. # This function calulates the true probability, given bookmaker # probabilites xx, and the inverse exponent. nn. -pwr_solvefor <- function(nn, io){ +pwr_solvefor <- function(nn, io, trgtprob){ tmp <- pwr_func(nn, io) - sum(tmp) - 1 + sum(tmp) - trgtprob } # Simple discrete KL-divergence. @@ -415,8 +536,10 @@ jsd_func <- function(jsd, io){ # That the underlying probability i less than the # inverse odds. pp[ii] <- stats::uniroot(f = tosolve, - interval = c(0.00001, io[ii]), + interval = c(0.000001, io[ii]), + extendInt = 'yes', io = io[ii], jsd = jsd)$root + } return(pp) } @@ -424,8 +547,8 @@ jsd_func <- function(jsd, io){ # Calculate the probabilities using the Jensen-Shannon distance method, # for a given value of the odds ratio cc. # io = inverted odds. -jsd_solvefor <- function(jsd, io){ - sum(jsd_func(jsd=jsd, io = io)) - 1 +jsd_solvefor <- function(jsd, io, trgtprob){ + sum(jsd_func(jsd=jsd, io = io)) - trgtprob } diff --git a/build/vignette.rds b/build/vignette.rds index a1a2d393ec7f7efa30f71e4c59bbd5b76964dac0..f231d03231f13921b9864a1d5dd3f5bb3d785c23 100644 GIT binary patch literal 247 zcmV%71Lvo|gc2mHupMmR xc2(N;{-U8)*VHW=H_q-h)OG#nI?4pCoFfO07&SCKYOA;1;|Z-o_2&cv0031sbmss7 literal 221 zcmV<303!b%iwFP!0000025nJM3xY5Z-qb9GqVNw4diPg|5WR%-(Cct3N9trQq_=)~ zc9w>&4(_NrFnT*VIksE;3bSH=m-rWxh%&|u3SxF0C8zbfsuXxG2=jVSu zZVk#}&+m;?180J9dyEr{oDX}CLVpYU5XF6p6H~*_{E2=~$tE0bSax6XfY5@Oho(8L X%|m@gH4V=S_?_+-8mLON)Bykh25x0_ diff --git a/inst/doc/Troubleshooting.R b/inst/doc/Troubleshooting.R new file mode 100644 index 0000000..b5bfb71 --- /dev/null +++ b/inst/doc/Troubleshooting.R @@ -0,0 +1,26 @@ +## ----setup, include = FALSE--------------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +## ----example1_1--------------------------------------------------------------- +require(implied) + +my_odds <- rbind(c(1.15, 5, 10, 25), + c(4.1, 4.2, 8.2, 2.1), + c(3.8, 4.7, 5.9, 2.3)) + +my_probs <- implied_probabilities(my_odds, method = 'additive') + + +## ----example1_2--------------------------------------------------------------- +my_probs$problematic + +# Can also just list the line numbers +which(my_probs$problematic) + + +## ----example1_3--------------------------------------------------------------- +my_probs$probabilities + diff --git a/inst/doc/Troubleshooting.Rmd b/inst/doc/Troubleshooting.Rmd new file mode 100644 index 0000000..265c22e --- /dev/null +++ b/inst/doc/Troubleshooting.Rmd @@ -0,0 +1,162 @@ +--- +title: "Troubleshooting" +author: "Jonas C. Lindstrøm" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Troubleshooting} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + + +In this document you can find some guidance on what to do if you get errors or weird results. + +Errors and warning messages will typically occur when the algorithms that convert +odds to probabilities fails to give a proper results. This does not necessarily +mean that there is a bug in the algorithms, it is just as likely that the mathematical +relationship between the odds and the underlying probabilities does not conform to +the assumptions needed for the different methods to work. + +In my experience, the following scenarios can often cause problems for at least +some of the methods: + +- Extremely large odds can often create problems. +- Many outcomes, such win-odds for competitions with many contestants. +- Very large bookamker margins. + +When the conversion from odds to probabilities fail, or give inappropriate results, +a warning will show that indicates what the problem might be. In addition, the output +from the implied_probabilities function will contain a vector that indicates which +lines there are problems. + +## Identifying problematic results + +Here is an example. In the code below there are 3 lines of 4-way odds that is converted +to probabilities using the 'additive' method. The first of these odds fails to be +properly converted and you get a warning saying 'Probabilities outside the 0-1 +range produced at 1 instances'. + +The point of this example is not to 'fix' the results or tweak the algorithm to +work. The algorithm works as it should, it is just that the mathematical relationship +between the odds and the underlying probabilities does not work well with the additive +method. The point is just to show how to find the problematic results, if they occur. + +The methods 'wpo', and 'bb' doesn't work with this set of odds either, but the +rest does. + +```{r example1_1} +require(implied) + +my_odds <- rbind(c(1.15, 5, 10, 25), + c(4.1, 4.2, 8.2, 2.1), + c(3.8, 4.7, 5.9, 2.3)) + +my_probs <- implied_probabilities(my_odds, method = 'additive') + +``` + + +The vector named "problematic" indicates that there is a problem in the first odds-line +in the input. + +```{r example1_2} +my_probs$problematic + +# Can also just list the line numbers +which(my_probs$problematic) + +``` + +And if we look at the probabilities, you will see that the 4th probability +in the first line is negative, which isn't a valid probability. + +```{r example1_3} +my_probs$probabilities +``` + + + + +## Warning: Probabilities outside the 0-1 range + +This warning means that some of the converted probabilities are outside the valid +range of probabilities, which is between 0 and 1. Most likely a negative probability. + +Unless there is an accompanying warning about uniroot-problems, there isn't much +to do about this, and you should conclude that the conversion method you have used +is incompatible with the odds you have. Try another method. + + + +## Warning: Error in stats::uniroot: f() values at end points not of opposite sign + +This error can happen when using the methods 'shin', 'or', 'power', or 'jsd'. These +methods convert the odds to probabilities using an equations solver called uniroot. +Uniroot does a search of possible values of the factor used in the methods, and +finds the factor that gives correct probabilities (ie they sum to 1). Sometimes +the solver cant find the probabilities. + +There are two possible reasons for why the solver cant find the correct factor and +the correct probabilities. The first reason (and the most likely) is that the method +you have chosen simply does not have a valid solution. Unfortunately, there is not +really much to do about it, other than using a different method. + +The second reason could be that some of the settings used in the solver does not +allow the algorithm to find the solution. You can change some of the settings in +the uniroot solver via the uniroot_options argument in implied_probabilities(). + +The following uniroot settings can be changed: interval, maxit, tol and extendInt. +Take a look at the help page for the uniroot function for more information about +the different settings. + + + +## Warning in log(x/y) : NaNs produced + +This warning sometimes occur with method 'jsd', when the odds are extreme or otherwise +difficult to convert. This does not get flagged as problematic, because it might +not actually be a problem. But you should check if the probabilities in question +seem reasonable. + + +## Error: Some inverse odds sum to less than 1. + +This error occurs when the naive implied probabilities sum to less than 1. The whole +point of the conversion methods in this package is to convert odds to proper probabilities +where the odds imply a total probability greater than 1, which gives the bookmaker's +an advantage. If they sum to less than one it means that the bookmaker's odds are advantageous +for the bettor. This is a very unlikely scenario, and it is most likely due to an +error in your data processing pipeline. + +The conversion methods might be made to work in this case, but I haven't tried or +tested it. This might cahnge in the future. + + +## Warning: Could not find z: Did not converge in x instances. Some results may be unreliable. + +This warning can happen with method 'shin', and with shin_method = 'js'. There are +two possible fixes: + + - Try to increase shin_maxiter from the default 1000 to something larger, like 2000. + - Change shin_method to 'uniroot'. + + +## Warning: z estimated to be negative: Some results may be unreliable. + +This warning can happen with methods 'shin' and 'bb'. I am actually not sure if +the results should be considered unreliable, or if they can be useful. These are +not flagged as problematic, and you need to look at the 'zvalues' in the output +to see which ones are negative. + + + + + diff --git a/inst/doc/Troubleshooting.html b/inst/doc/Troubleshooting.html new file mode 100644 index 0000000..a5b5ef9 --- /dev/null +++ b/inst/doc/Troubleshooting.html @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + +Troubleshooting + + + + + + + + + + + + + + + + + + + + + + + + + +

Troubleshooting

+

Jonas C. Lindstrøm

+

2023-06-11

+ + + +

In this document you can find some guidance on what to do if you get errors or weird results.

+

Errors and warning messages will typically occur when the algorithms that convert odds to probabilities fails to give a proper results. This does not necessarily mean that there is a bug in the algorithms, it is just as likely that the mathematical relationship between the odds and the underlying probabilities does not conform to the assumptions needed for the different methods to work.

+

In my experience, the following scenarios can often cause problems for at least some of the methods:

+
    +
  • Extremely large odds can often create problems.
  • +
  • Many outcomes, such win-odds for competitions with many contestants.
  • +
  • Very large bookamker margins.
  • +
+

When the conversion from odds to probabilities fail, or give inappropriate results, a warning will show that indicates what the problem might be. In addition, the output from the implied_probabilities function will contain a vector that indicates which lines there are problems.

+
+

Identifying problematic results

+

Here is an example. In the code below there are 3 lines of 4-way odds that is converted to probabilities using the ‘additive’ method. The first of these odds fails to be properly converted and you get a warning saying ‘Probabilities outside the 0-1 range produced at 1 instances’.

+

The point of this example is not to ‘fix’ the results or tweak the algorithm to work. The algorithm works as it should, it is just that the mathematical relationship between the odds and the underlying probabilities does not work well with the additive method. The point is just to show how to find the problematic results, if they occur.

+

The methods ‘wpo’, and ‘bb’ doesn’t work with this set of odds either, but the rest does.

+
require(implied)
+#> Loading required package: implied
+#> If you find this package useful, please consider supporting the development at
+#> https://ko-fi.com/opisthokonta
+
+my_odds <- rbind(c(1.15, 5, 10, 25),
+                c(4.1, 4.2, 8.2, 2.1),
+                c(3.8, 4.7, 5.9, 2.3))
+
+my_probs <- implied_probabilities(my_odds, method = 'additive')
+#> Warning in implied_probabilities(my_odds, method = "additive"): Probabilities outside the 0-1 range produced at 1 instances.
+

The vector named “problematic” indicates that there is a problem in the first odds-line in the input.

+
my_probs$problematic
+#> [1]  TRUE FALSE FALSE
+
+# Can also just list the line numbers
+which(my_probs$problematic)
+#> [1] 1
+

And if we look at the probabilities, you will see that the 4th probability in the first line is negative, which isn’t a valid probability.

+
my_probs$probabilities
+#>           [,1]      [,2]      [,3]       [,4]
+#> [1,] 0.8171739 0.1476087 0.0476087 -0.0123913
+#> [2,] 0.2238676 0.2180604 0.1019164  0.4561556
+#> [3,] 0.2431084 0.1927165 0.1494420  0.4147331
+
+
+

Warning: Probabilities outside the 0-1 range

+

This warning means that some of the converted probabilities are outside the valid range of probabilities, which is between 0 and 1. Most likely a negative probability.

+

Unless there is an accompanying warning about uniroot-problems, there isn’t much to do about this, and you should conclude that the conversion method you have used is incompatible with the odds you have. Try another method.

+
+
+

Warning: Error in stats::uniroot: f() values at end points not of opposite sign

+

This error can happen when using the methods ‘shin’, ‘or’, ‘power’, or ‘jsd’. These methods convert the odds to probabilities using an equations solver called uniroot. Uniroot does a search of possible values of the factor used in the methods, and finds the factor that gives correct probabilities (ie they sum to 1). Sometimes the solver cant find the probabilities.

+

There are two possible reasons for why the solver cant find the correct factor and the correct probabilities. The first reason (and the most likely) is that the method you have chosen simply does not have a valid solution. Unfortunately, there is not really much to do about it, other than using a different method.

+

The second reason could be that some of the settings used in the solver does not allow the algorithm to find the solution. You can change some of the settings in the uniroot solver via the uniroot_options argument in implied_probabilities().

+

The following uniroot settings can be changed: interval, maxit, tol and extendInt. Take a look at the help page for the uniroot function for more information about the different settings.

+
+
+

Warning in log(x/y) : NaNs produced

+

This warning sometimes occur with method ‘jsd’, when the odds are extreme or otherwise difficult to convert. This does not get flagged as problematic, because it might not actually be a problem. But you should check if the probabilities in question seem reasonable.

+
+
+

Error: Some inverse odds sum to less than 1.

+

This error occurs when the naive implied probabilities sum to less than 1. The whole point of the conversion methods in this package is to convert odds to proper probabilities where the odds imply a total probability greater than 1, which gives the bookmaker’s an advantage. If they sum to less than one it means that the bookmaker’s odds are advantageous for the bettor. This is a very unlikely scenario, and it is most likely due to an error in your data processing pipeline.

+

The conversion methods might be made to work in this case, but I haven’t tried or tested it. This might cahnge in the future.

+
+
+

Warning: Could not find z: Did not converge in x instances. Some results may be unreliable.

+

This warning can happen with method ‘shin’, and with shin_method = ‘js’. There are two possible fixes:

+
    +
  • Try to increase shin_maxiter from the default 1000 to something larger, like 2000.
  • +
  • Change shin_method to ‘uniroot’.
  • +
+
+
+

Warning: z estimated to be negative: Some results may be unreliable.

+

This warning can happen with methods ‘shin’ and ‘bb’. I am actually not sure if the results should be considered unreliable, or if they can be useful. These are not flagged as problematic, and you need to look at the ‘zvalues’ in the output to see which ones are negative.

+
+ + + + + + + + + + + diff --git a/inst/doc/introduction.R b/inst/doc/introduction.R index cec0bbc..3488a58 100644 --- a/inst/doc/introduction.R +++ b/inst/doc/introduction.R @@ -88,6 +88,17 @@ res9$probabilities # The estimated noise (JS distance) res9$distance +## ----target_prob-------------------------------------------------------------- +# Example odds. +odds_reach_final <- c(1.6, 2.63, 3.3, 3.7, 5.6, 7.1, 12.5, 16.5, 25) + +res10 <- implied_probabilities(odds_reach_final, method = 'or', target_probability = 2) + +res10$probabilities + +sum(res10$probabilities) + + ## ----imp_odds1---------------------------------------------------------------- res_odds1 <- implied_odds(res4$probabilities[1,], diff --git a/inst/doc/introduction.Rmd b/inst/doc/introduction.Rmd index d3ea57a..45ec38a 100644 --- a/inst/doc/introduction.Rmd +++ b/inst/doc/introduction.Rmd @@ -1,252 +1,277 @@ ---- -title: "Introduction to the implied package" -author: "Jonas C. Lindstrøm" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Introduction to the implied package} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabiltiies. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demostration on how to convert probabiliteis to bookmaker odds. - -A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the [favorite-longshot bias](https://en.wikipedia.org/wiki/Favourite-longshot_bias). The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias. - - -## The basic method - -The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilties, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the cooresponding inverted odds, then the probabilities are computed as - -pi = ri / sum(r) - -This method tend to be the least accurate of the methods in this package. I have also seen this normalization method been referred to as the multiplicative method. - -The implied_probabilities() function return a list with the proper probabilities (as a matrix) and the bookmaker margins. - -In the examples below are three sets of bookmaker odds from three football matches. - -```{r basic} - -library(implied) - -# One column for each outcome, one row for each race or match. -my_odds <- rbind(c(4.20, 3.70, 1.95), - c(2.45, 3.70, 2.90), - c(2.05, 3.20, 3.80)) -colnames(my_odds) <- c('Home', 'Draw', 'Away') - -res1 <- implied_probabilities(my_odds) - -res1$probabilities - -res1$margin - -``` - - -## Margin Weights Proportional to the Odds - -This method is from [Joseph Buchdahl's Wisom of the Crowds document](https://www.football-data.co.uk/wisdom_of_crowd_bets), and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias. - -The probabilities are calculated from the bookmaker odds O using the following formula - -pi = (n - M * Oi) / n * Oi - -where n is the number of outcomes, and M is the bookmaker margin. - -```{r wpo} -res2 <- implied_probabilities(my_odds, method = 'wpo') - -res2$probabilities - -# The margins applied to each outcome. -res2$specific_margins -``` - -## The odds ratio method - -The odds ratio method is also from the Wisdom of the Crowds document, but is originally from an [article by Keith Cheung](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/). This method models the relationship between the proper probabilities and the improper bookmaker probabilties using the odds ratio (OR) function: - -OR = pi (1 - ri) / ri (1 - pi) - -This gives the probabilities - -pi = ri / OR + ri - (OR * ri) - -where the odds ratio OR is selected so that sum(pi) = 1. - - -```{r or} -res3 <- implied_probabilities(my_odds, method = 'or') - -res3$probabilities - -# The odds ratios converting the proper probablities to bookmaker probabilities. -res3$odds_ratios -``` - -## The power method - -The power method models the bookmaker probabilties as a power function of the proper probabilties. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method. - -pi = ri(1/k) - -where k is selected so that sum(pi) = 1. - -```{r power} -res4 <- implied_probabilities(my_odds, method = 'power') - -res4$probabilities - -# The inverse exponents (n) used to convert the proper probablities to bookmaker probabilities. -res4$exponents -``` - -## The additive method - -The additive method removes the margin from the naive probabilites by subtracting an equal amount of of the margin from each outcome. The formula used is - -pi = ri - ((sum(r) - 1) / n) - -If there are only two outcomes, the additive method and Shin's method are equivalent. - - -```{r additive1} - -res5 <- implied_probabilities(my_odds, method = 'additive') - -res5$probabilities - -``` - -One problem with the additive method is that it can produce negative probabilities, escpecially for outcomes with low probabilties. This can often be the case when there are many outcomes, for example in racing sports. If this happens, you will be given a warning. Here is an example taken from Clarke et al (2017): - -```{r additive2} - -my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) -colnames(my_odds2) <- paste('X', 1:6, sep='') - -res6 <- implied_probabilities(my_odds2, method = 'additive') - -res6$probabilities - -``` - - - - -## Balanced books and Shin's method - -The two methods referred to as "balanced book" and Shin's method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise "true" uncertainty about the outcome. The proportion of inside traders is denoted Z. - -The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin's method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur. - -We can not know what the insiders know, but both methods gives an estimate of the proportion of insiders. - - -```{r shin} -res7 <- implied_probabilities(my_odds, method = 'shin') - -res7$probabilities - -# The estimated proportion of inside traders. -res7$zvalues -``` - - -```{r bb} -# Balanced books -res8 <- implied_probabilities(my_odds, method = 'bb') - -res8$probabilities - -# The estimated proportion of inside traders. -res8$zvalues -``` - - -## The Jensen–Shannon distance method - -This method sees the improper bookmaker probabilties as a noisy version of the true underlying probabilities, and uses the [Jensen–Shannon (JS) distance](https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence) as a measure of how noisy the bookmaker probabilities are. - -For the sake of finding the denoised proabilities pi, each outcome i is modelled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi: - - -D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi) - -where mi = (pi + ri) / 2 - -and - -BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y)) - -is the "binomial" Kullback–Leibler divergence. - -The solution is found numerically by finding the value of of D so that sum(pi) = 1. - -The method was developed by Christopher D. Long (twitter: [\@octonion](https://twitter.com/octonion)), and described in a series of Twitter postings [[1](https://twitter.com/octonion/status/1412847000068952064)]. - -```{r jsd} -# Balanced books -res9 <- implied_probabilities(my_odds, method = 'jsd') - -res9$probabilities - -# The estimated noise (JS distance) -res9$distance -``` - -## Converting probabilities to odds - -There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details. - -In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy. - - -```{r imp_odds1} - -res_odds1 <- implied_odds(res4$probabilities[1,], - margin = res4$margin[1], - method = 'power') - -res_odds1$odds - -# The exponents. -res_odds1$exponents - -# Compare to the exponent from the odds-to-probability conversion. -res4$exponents[1] - -``` - - -## Other packages -The [odds.converter](https://cran.r-project.org/package=odds.converter) package can convert between different odds formats, including to decimal odds, that this package requires. - - -## Literature -Here are some relevant references and links: - -* Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET [Link](https://www.football-data.co.uk/wisdom_of_crowd_bets) - -* Keith Cheung (2015) Fixed-odds betting and traditional odds [Link](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/) - -* Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround [Link](http://www.sciencepublishinggroup.com/journal/paperinfo?journalid=155&doi=10.11648/j.ajss.20170506.12) - -* Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias [Link](https://doi.org/10.2307/2234526) - -* Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims [Link](https://doi.org/10.2307/2234526) - -* Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin [Link](https://doi.org/10.2307/2235458) - -* John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.[Link](https://www.semanticscholar.org/paper/Optimal-Determination-of-Bookmakers'-Betting-Odds%3A-Fingleton-Waldron/e576f3b103e0ba041ae072a9201b948059c7806e) - - +--- +title: "Introduction to the implied package" +author: "Jonas C. Lindstrøm" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{Introduction to the implied package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabilities. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demonstration on how to convert probabilities to bookmaker odds. + +A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the [favorite-longshot bias](https://en.wikipedia.org/wiki/Favourite-longshot_bias). The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias. + +In version 0.5 a new feature was introduced. It is now possible to convert odds to probabilities with multiple winners, which means that the probabilities should sum to something greater than 1. One example of this is when you have odds for different teams/players to finish top 3 in a league, in which case the probabilities should sum to 3 instead of 1. The details are explained towards the end of this document. + + +# Methods +## The basic method + +The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilities, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the corresponding inverted odds, then the probabilities are computed as + +pi = ri / sum(r) + +This method tend to be the least accurate of the methods in this package. I have also seen this normalization method been referred to as the multiplicative method. + +The implied_probabilities() function return a list with the proper probabilities (as a matrix) and the bookmaker margins. + +In the examples below are three sets of bookmaker odds from three football matches. + +```{r basic} + +library(implied) + +# One column for each outcome, one row for each race or match. +my_odds <- rbind(c(4.20, 3.70, 1.95), + c(2.45, 3.70, 2.90), + c(2.05, 3.20, 3.80)) +colnames(my_odds) <- c('Home', 'Draw', 'Away') + +res1 <- implied_probabilities(my_odds) + +res1$probabilities + +res1$margin + +``` + + +## Margin Weights Proportional to the Odds + +This method is from [Joseph Buchdahl's Wisom of the Crowds document](https://www.football-data.co.uk/wisdom_of_crowd_bets), and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias. + +The probabilities are calculated from the bookmaker odds O using the following formula + +pi = (n - M * Oi) / n * Oi + +where n is the number of outcomes, and M is the bookmaker margin. + +```{r wpo} +res2 <- implied_probabilities(my_odds, method = 'wpo') + +res2$probabilities + +# The margins applied to each outcome. +res2$specific_margins +``` + +## The odds ratio method + +The odds ratio method is also from the Wisdom of the Crowds document, but is originally from an [article by Keith Cheung](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/). This method models the relationship between the proper probabilities and the improper bookmaker probabilties using the odds ratio (OR) function: + +OR = pi (1 - ri) / ri (1 - pi) + +This gives the probabilities + +pi = ri / OR + ri - (OR * ri) + +where the odds ratio OR is selected so that sum(pi) = 1. + + +```{r or} +res3 <- implied_probabilities(my_odds, method = 'or') + +res3$probabilities + +# The odds ratios converting the proper probablities to bookmaker probabilities. +res3$odds_ratios +``` + +## The power method + +The power method models the bookmaker probabilities as a power function of the proper probabilities. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method. + +pi = ri(1/k) + +where k is selected so that sum(pi) = 1. + +```{r power} +res4 <- implied_probabilities(my_odds, method = 'power') + +res4$probabilities + +# The inverse exponents (n) used to convert the proper probablities to bookmaker probabilities. +res4$exponents +``` + +## The additive method + +The additive method removes the margin from the naive probabilities by subtracting an equal amount of of the margin from each outcome. The formula used is + +pi = ri - ((sum(r) - 1) / n) + +If there are only two outcomes, the additive method and Shin's method are equivalent. + + +```{r additive1} + +res5 <- implied_probabilities(my_odds, method = 'additive') + +res5$probabilities + +``` + +One problem with the additive method is that it can produce negative probabilities, escpecially for outcomes with low probabilties. This can often be the case when there are many outcomes, for example in racing sports. If this happens, you will be given a warning. Here is an example taken from Clarke et al (2017): + +```{r additive2} + +my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) +colnames(my_odds2) <- paste('X', 1:6, sep='') + +res6 <- implied_probabilities(my_odds2, method = 'additive') + +res6$probabilities + +``` + + + + +## Balanced books and Shin's method + +The two methods referred to as "balanced book" and Shin's method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise "true" uncertainty about the outcome. The proportion of inside traders is denoted Z. + +The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin's method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur. + +We can not know what the insiders know, but both methods gives an estimate of the proportion of insiders. + + +```{r shin} +res7 <- implied_probabilities(my_odds, method = 'shin') + +res7$probabilities + +# The estimated proportion of inside traders. +res7$zvalues +``` + + +```{r bb} +# Balanced books +res8 <- implied_probabilities(my_odds, method = 'bb') + +res8$probabilities + +# The estimated proportion of inside traders. +res8$zvalues +``` + + +## The Jensen–Shannon distance method + +This method sees the improper bookmaker probabilities as a noisy version of the true underlying probabilities, and uses the [Jensen–Shannon (JS) distance](https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence) as a measure of how noisy the bookmaker probabilities are. + +For the sake of finding the denoised probabilities pi, each outcome i is modeled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi: + + +D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi) + +where mi = (pi + ri) / 2 + +and + +BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y)) + +is the "binomial" Kullback–Leibler divergence. + +The solution is found numerically by finding the value of of D so that sum(pi) = 1. + +The method was developed by Christopher D. Long (twitter: @octonion), and described in a series of Twitter postings. + +```{r jsd} +# Balanced books +res9 <- implied_probabilities(my_odds, method = 'jsd') + +res9$probabilities + +# The estimated noise (JS distance) +res9$distance +``` + + +# Multiple winning outcomes + +In the examples above it has been assumed that the probabilities should sum to 1. This is the correct approach when only 1 of the possible outcomes occur, but this is not correct when multiple outcomes occur. One example of this are odds for players/teams to reach the final in a tournament. In this case the probabilities should sum to 2, as two of the outcomes will be considered a win. Another example is placing in the top 5 in a league, in which case the probabilities should sum to 5. + +You can change the target_probability to something other than 1, and this works for most methods. + +```{r target_prob} +# Example odds. +odds_reach_final <- c(1.6, 2.63, 3.3, 3.7, 5.6, 7.1, 12.5, 16.5, 25) + +res10 <- implied_probabilities(odds_reach_final, method = 'or', target_probability = 2) + +res10$probabilities + +sum(res10$probabilities) + +``` + + +# Converting probabilities to odds + +There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details. + +In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy. + + +```{r imp_odds1} + +res_odds1 <- implied_odds(res4$probabilities[1,], + margin = res4$margin[1], + method = 'power') + +res_odds1$odds + +# The exponents. +res_odds1$exponents + +# Compare to the exponent from the odds-to-probability conversion. +res4$exponents[1] + +``` + + +# Other packages +The [odds.converter](https://cran.r-project.org/package=odds.converter) package can convert between different odds formats, including to decimal odds, that this package requires. + + +# Literature +Here are some relevant references and links: + +* Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET [Link](https://www.football-data.co.uk/wisdom_of_crowd_bets) + +* Keith Cheung (2015) Fixed-odds betting and traditional odds [Link](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/) + +* Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround [Link](http://www.sciencepublishinggroup.com/journal/paperinfo?journalid=155&doi=10.11648/j.ajss.20170506.12) + +* Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias [Link](https://doi.org/10.2307/2234526) + +* Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims [Link](https://doi.org/10.2307/2234526) + +* Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin [Link](https://doi.org/10.2307/2235458) + +* John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.[Link](https://www.semanticscholar.org/paper/Optimal-Determination-of-Bookmakers'-Betting-Odds%3A-Fingleton-Waldron/e576f3b103e0ba041ae072a9201b948059c7806e) + + diff --git a/inst/doc/introduction.html b/inst/doc/introduction.html index 7e4d666..3cb4028 100644 --- a/inst/doc/introduction.html +++ b/inst/doc/introduction.html @@ -12,7 +12,7 @@ - + Introduction to the implied package @@ -141,40 +141,59 @@

Introduction to the implied package

Jonas C. Lindstrøm

-

2022-06-08

+

2023-06-11

+ -

This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabiltiies. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demostration on how to convert probabiliteis to bookmaker odds.

+

This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabilities. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demonstration on how to convert probabilities to bookmaker odds.

A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the favorite-longshot bias. The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias.

+

In version 0.5 a new feature was introduced. It is now possible to convert odds to probabilities with multiple winners, which means that the probabilities should sum to something greater than 1. One example of this is when you have odds for different teams/players to finish top 3 in a league, in which case the probabilities should sum to 3 instead of 1. The details are explained towards the end of this document.

+
+

Methods

The basic method

-

The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilties, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the cooresponding inverted odds, then the probabilities are computed as

+

The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilities, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the corresponding inverted odds, then the probabilities are computed as

pi = ri / sum(r)

This method tend to be the least accurate of the methods in this package. I have also seen this normalization method been referred to as the multiplicative method.

The implied_probabilities() function return a list with the proper probabilities (as a matrix) and the bookmaker margins.

In the examples below are three sets of bookmaker odds from three football matches.


 library(implied)
-#> If you find this package useful, please consider supporting the development at
-#> https://ko-fi.com/opisthokonta
-
-# One column for each outcome, one row for each race or match.
-my_odds <- rbind(c(4.20, 3.70, 1.95),
-                 c(2.45, 3.70, 2.90),
-                 c(2.05, 3.20, 3.80))
-colnames(my_odds) <- c('Home', 'Draw', 'Away')
+
+# One column for each outcome, one row for each race or match.
+my_odds <- rbind(c(4.20, 3.70, 1.95),
+                 c(2.45, 3.70, 2.90),
+                 c(2.05, 3.20, 3.80))
+colnames(my_odds) <- c('Home', 'Draw', 'Away')
+
+res1 <- implied_probabilities(my_odds)
 
-res1 <- implied_probabilities(my_odds)
-
-res1$probabilities
-#>           Home      Draw      Away
-#> [1,] 0.2331556 0.2646631 0.5021813
-#> [2,] 0.3988848 0.2641264 0.3369888
-#> [3,] 0.4586948 0.2938514 0.2474538
-
-res1$margin
-#> [1] 0.02118602 0.02326112 0.06346277
+res1$probabilities +#> Home Draw Away +#> [1,] 0.2331556 0.2646631 0.5021813 +#> [2,] 0.3988848 0.2641264 0.3369888 +#> [3,] 0.4586948 0.2938514 0.2474538 + +res1$margin +#> [1] 0.02118602 0.02326112 0.06346277

Margin Weights Proportional to the Odds

@@ -208,17 +227,17 @@

The odds ratio method

res3$probabilities #> Home Draw Away -#> [1,] 0.2320048 0.2636415 0.5043537 -#> [2,] 0.3996912 0.2633869 0.3369219 -#> [3,] 0.4634406 0.2919032 0.2446562 +#> [1,] 0.2320045 0.2636413 0.5043542 +#> [2,] 0.3996913 0.2633868 0.3369219 +#> [3,] 0.4634417 0.2919028 0.2446556 # The odds ratios converting the proper probablities to bookmaker probabilities. res3$odds_ratios -#> [1] 1.034449 1.035805 1.102606
+#> [1] 1.034456 1.035814 1.102631

The power method

-

The power method models the bookmaker probabilties as a power function of the proper probabilties. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method.

+

The power method models the bookmaker probabilities as a power function of the proper probabilities. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method.

pi = ri(1/k)

where k is selected so that sum(pi) = 1.

res4 <- implied_probabilities(my_odds, method = 'power')
@@ -235,7 +254,7 @@ 

The power method

The additive method

-

The additive method removes the margin from the naive probabilites by subtracting an equal amount of of the margin from each outcome. The formula used is

+

The additive method removes the margin from the naive probabilities by subtracting an equal amount of of the margin from each outcome. The formula used is

pi = ri - ((sum(r) - 1) / n)

If there are only two outcomes, the additive method and Shin’s method are equivalent.


@@ -289,15 +308,15 @@ 

Balanced books and Shin’s method

The Jensen–Shannon distance method

-

This method sees the improper bookmaker probabilties as a noisy version of the true underlying probabilities, and uses the Jensen–Shannon (JS) distance as a measure of how noisy the bookmaker probabilities are.

-

For the sake of finding the denoised proabilities pi, each outcome i is modelled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi:

+

This method sees the improper bookmaker probabilities as a noisy version of the true underlying probabilities, and uses the Jensen–Shannon (JS) distance as a measure of how noisy the bookmaker probabilities are.

+

For the sake of finding the denoised probabilities pi, each outcome i is modeled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi:

D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi)

where mi = (pi + ri) / 2

and

BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y))

is the “binomial” Kullback–Leibler divergence.

The solution is found numerically by finding the value of of D so that sum(pi) = 1.

-

The method was developed by Christopher D. Long (twitter: @octonion), and described in a series of Twitter postings [1].

+

The method was developed by Christopher D. Long (twitter: @octonion), and described in a series of Twitter postings.

# Balanced books
 res9 <- implied_probabilities(my_odds, method = 'jsd')
 
@@ -309,35 +328,54 @@ 

The Jensen–Shannon distance method

# The estimated noise (JS distance) res9$distance -#> [1] 0.005485370 0.005849242 0.016099288
+#> [1] 0.005485371 0.005849245 0.016099204
-
-

Converting probabilities to odds

+
+
+

Multiple winning outcomes

+

In the examples above it has been assumed that the probabilities should sum to 1. This is the correct approach when only 1 of the possible outcomes occur, but this is not correct when multiple outcomes occur. One example of this are odds for players/teams to reach the final in a tournament. In this case the probabilities should sum to 2, as two of the outcomes will be considered a win. Another example is placing in the top 5 in a league, in which case the probabilities should sum to 5.

+

You can change the target_probability to something other than 1, and this works for most methods.

+
# Example odds.
+odds_reach_final <- c(1.6, 2.63, 3.3, 3.7, 5.6, 7.1, 12.5, 16.5, 25)
+
+res10 <- implied_probabilities(odds_reach_final, method = 'or', target_probability = 2)
+
+res10$probabilities
+#>           [,1]      [,2]      [,3]     [,4]     [,5]      [,6]       [,7]
+#> [1,] 0.6107334 0.3660944 0.2904215 0.258519 0.169879 0.1336902 0.07566384
+#>            [,8]       [,9]
+#> [1,] 0.05725567 0.03774298
+
+sum(res10$probabilities)
+#> [1] 2
+
+
+

Converting probabilities to odds

There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details.

In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy.

-

-res_odds1 <- implied_odds(res4$probabilities[1,], 
-                     margin = res4$margin[1], 
-                     method = 'power')
-
-res_odds1$odds
-#>      Home     Draw     Away
-#> [1,]  4.2 3.700001 1.950005
-
-# The exponents.
-res_odds1$exponents
-#> [1] 0.9797634
-
-# Compare to the exponent from the odds-to-probability conversion.
-res4$exponents[1]
-#> [1] 0.9797666
+

+res_odds1 <- implied_odds(res4$probabilities[1,], 
+                     margin = res4$margin[1], 
+                     method = 'power')
+
+res_odds1$odds
+#>      Home     Draw     Away
+#> [1,]  4.2 3.700001 1.950005
+
+# The exponents.
+res_odds1$exponents
+#> [1] 0.9797634
+
+# Compare to the exponent from the odds-to-probability conversion.
+res4$exponents[1]
+#> [1] 0.9797666
-
-

Other packages

+
+

Other packages

The odds.converter package can convert between different odds formats, including to decimal odds, that this package requires.

-
-

Literature

+
+

Literature

Here are some relevant references and links:

  • Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET Link

  • diff --git a/man/implied_probabilities.Rd b/man/implied_probabilities.Rd index 894d008..6a71895 100644 --- a/man/implied_probabilities.Rd +++ b/man/implied_probabilities.Rd @@ -8,8 +8,11 @@ implied_probabilities( odds, method = "basic", normalize = TRUE, + target_probability = 1, grossmargin = 0, - shin_method = "js" + shin_method = "js", + shin_maxiter = 1000, + uniroot_options = NULL ) } \arguments{ @@ -19,12 +22,18 @@ implied_probabilities( 'wpo', 'or', 'power', 'additive', and 'jsd'.} \item{normalize}{Logical. Some of the methods will give small rounding errors. If TRUE (default) -a final normalization is applied to make absoultely sure the +a final normalization is applied to make absolutely sure the probabilities sum to 1.} +\item{target_probability}{Numeric. The value the probabilities should sum to. Default is 1.} + \item{grossmargin}{Numeric. Must be 0 or greater. See the details.} -\item{shin_method}{Character. Either 'js' (defeault) or 'uniroot'. See the details.} +\item{shin_method}{Character. Either 'js' (default) or 'uniroot'. See the details.} + +\item{shin_maxiter}{numeric. Max number of iterations for shin method 'js'.} + +\item{uniroot_options}{list. Option passed on to the uniroot solver, for those methods where it is applicable. See 'details'.} } \value{ A named list. The first component is named 'probabilities' and contain a matrix of @@ -34,35 +43,35 @@ depends on the method used to compute the probabilities: \item{ zvalues (method = 'shin' and method='bb'): The estimated amount of insider trade.} \item{ specific_margins (method = 'wpo'): Matrix of the margins applied to each outcome.} \item{ odds_ratios (method = 'or'): Numeric with the odds ratio that are used to convert true - probabilities to bookmaker probabilties.} + probabilities to bookmaker probabilities.} \item{ exponents (method = 'power'): The (inverse) exponents that are used to convert true - probabilities to bookmaker probabilties.} + probabilities to bookmaker probabilities.} \item{ distance (method = 'jsd'): The Jensen-Shannon distances that are used to convert true - probabilities to bookmaker probabilties.} + probabilities to bookmaker probabilities.} } -The fourth compnent 'problematic' is a logical vector called indicating if any probabilites has fallen +The fourth component 'problematic' is a logical vector called indicating if any probabilities has fallen outside the 0-1 range, or if there were some other problem computing the probabilities. } \description{ -This function calculate the implied probabilties from bookmaker odds in decimal format, while -accounting for overround in the odds. +This function calculate the implied probabilities from bookmaker odds in decimal format, while +accounting for over-round in the odds. } \details{ The method 'basic' is the simplest method, and computes the implied probabilities by dividing the inverted odds by the sum of the inverted odds. The methods 'wpo' (Weights Proportional to the Odds), 'or' (Odds Ratio) and 'power' are form the Wisdom of the Crowds document (the updated version) by -Joseph Buchdahl. The method 'or' is origianlly by Cheung (2015), and the method 'power' is there referred +Joseph Buchdahl. The method 'or' is originally by Cheung (2015), and the method 'power' is there referred to as the logarithmic method. The method 'shin' uses the method by Shin (1992, 1993). This model assumes that there is a fraction of insider trading, and that the bookmakers tries to maximize their profits. In addition to providing -implied probabilties, the method also gives an estimate of the proportion if inside trade, denoted z. Two algorithms -are implemented for finding the probabilities and z. Which algorithm to use is chosen via the shin_mehod argument. +implied probabilities, the method also gives an estimate of the proportion if inside trade, denoted z. Two algorithms +are implemented for finding the probabilities and z. Which algorithm to use is chosen via the shin_method argument. The default method (shin_method = 'js') is based on the algorithm in Jullien & Salanié (1994). The 'uniroot' method uses R's built in equation solver to find the probabilities. The uniroot approach is also used for the -'pwr' and 'or' methods. The two methods might give slightly different answers, especially when the bookamer margin +'pwr' and 'or' methods. The two methods might give slightly different answers, especially when the bookmaker margin (and z) is small. The 'bb' (short for "balanced books") method is from Fingleton & Waldron (1999), and is a variant of Shin's method. It too assume @@ -71,12 +80,17 @@ minimize their risk. Both the 'shin' and 'bb' methods can be used together with the 'grossmargin' argument. This is also from the Fingleton & Waldron (1999) paper, and adds some further assumption to the calculations, -related to opperating costs. grossmargin should be 0 (default) or greater, typical range is 0 to 0.05. +related to operating costs. grossmargin should be 0 (default) or greater, typical range is 0 to 0.05. For values other than 0, this might sometimes cause some probabilities to not be identifiable. A warning will be given if this happens. The method 'jsd' was developed by Christopher D. Long, and described in a series of Twitter postings and a python implementation posted on GitHub. + +Methods 'shin', 'or', 'power', and 'jsd' use the uniroot solver to find the correct probabilities. Sometimes it will fail +to find a solution, but it can be made to work by tuning some setting. The uniroot_options argument accepts a list with +options that are passed on to the uniroot function. Currently the interval, maxit, tol and extendInt argument of +uniroot can be changed. See the Troubleshooting vignette for more details. } \section{References}{ diff --git a/tests/testthat/test_1.R b/tests/testthat/test_1.R index 5d0b528..f2b8052 100644 --- a/tests/testthat/test_1.R +++ b/tests/testthat/test_1.R @@ -16,10 +16,10 @@ my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) toll <- 0.00005 +# Implied probabilities ---- context("Implied probabilities") - iprobs1_basic <- implied_probabilities(my_odds) iprobs1_shin <- implied_probabilities(my_odds, method='shin') iprobs1_shin2 <- implied_probabilities(my_odds, method='shin', grossmargin = 0.01) @@ -43,7 +43,7 @@ iprobs2_or <- implied_probabilities(my_odds2, method='or') iprobs2_power <- implied_probabilities(my_odds2, method='power') # The KL method does not work with my_odds2. -#iprobs2_kl <- implied_probabilities(my_odds2, method='kl') +#iprobs2_jsd <- implied_probabilities(my_odds2, method='jsd') @@ -170,6 +170,7 @@ test_that("Output", { }) +# Non-normalized results ---- context("Non-normalized results") iprobs1_basic_nn <- implied_probabilities(my_odds, normalize = FALSE) @@ -201,6 +202,7 @@ test_that("Non-normalized results", { }) +# Missing values ---- context("Missing values") # some example odds, with missing value @@ -286,7 +288,81 @@ test_that("missing values", { }) +# Target probabilities other than 1 ---- +context("Target probabilities other than 1") + +# Some English Premier League relegation odds. Should sum to 3. +relegation_odds <- c(1.53, 1.67, 1.25, 2.38, 2.38, 4.5, 5.5, 6.5, 7) + + +iprobs3_basic <- implied_probabilities(relegation_odds, method='basic', + target_probability = 3, + normalize = FALSE) + +# Shin does not work with the test odds. +# iprobs3_shin <- implied_probabilities(relegation_odds, method='shin', +# shin_method = 'uniroot', +# target_probability = 3, normalize = FALSE) + + +iprobs3_bb <- implied_probabilities(relegation_odds, method='bb', + target_probability = 3, normalize = FALSE) + +iprobs3_wpo <- implied_probabilities(relegation_odds, method='wpo', + target_probability = 3, normalize = FALSE) + +iprobs3_power <- implied_probabilities(relegation_odds, method='power', + target_probability = 3, normalize = FALSE) + +iprobs3_or <- implied_probabilities(relegation_odds, method='or', + target_probability = 3, normalize = FALSE) + +iprobs3_additive <- implied_probabilities(relegation_odds, method='additive', + target_probability = 3, normalize = FALSE) + + +iprobs3_jsd <- implied_probabilities(relegation_odds, method='jsd', + target_probability = 3, normalize = FALSE) + + + +test_that("Target probability 3", { + + expect_equal(class(iprobs3_basic), 'list') + expect_equal(class(iprobs3_bb), 'list') + expect_equal(class(iprobs3_wpo), 'list') + expect_equal(class(iprobs3_power), 'list') + expect_equal(class(iprobs3_or), 'list') + expect_equal(class(iprobs3_additive), 'list') + expect_equal(class(iprobs3_jsd), 'list') + expect_equal(all(abs(rowSums(iprobs3_basic$probabilities) - 3) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs3_bb$probabilities) - 3) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs3_wpo$probabilities) - 3) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs3_power$probabilities) - 3) < 0.0001), TRUE) + expect_equal(all(abs(rowSums(iprobs3_or$probabilities) - 3) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs3_additive$probabilities) - 3) < toll), TRUE) + expect_equal(all(abs(rowSums(iprobs3_jsd$probabilities) - 3) < toll), TRUE) + + + expect_equal(all(iprobs3_basic$margin > 0), TRUE) + expect_equal(all(iprobs3_bb$margin > 0), TRUE) + expect_equal(all(iprobs3_wpo$margin > 0), TRUE) + expect_equal(all(iprobs3_power$margin > 0), TRUE) + expect_equal(all(iprobs3_or$margin > 0), TRUE) + expect_equal(all(iprobs3_additive$margin > 0), TRUE) + expect_equal(all(iprobs3_additive$jsd > 0), TRUE) + + expect_equal(is.null(iprobs3_wpo$specific_margins), FALSE) + expect_equal(is.null(iprobs3_or$odds_ratios), FALSE) + expect_equal(is.null(iprobs3_power$exponents), FALSE) + expect_equal(is.null(iprobs3_jsd$distance), FALSE) + + }) + + + +# Implied odds ---- context("Implied odds") @@ -393,6 +469,24 @@ test_that("Output", { }) + +# uniroot options ---- +context("uniroot options") + +# Example where the interval is too narrow (true or > 1.03), and extendInt is set to 'no. + + +test_that("Uniroot options", + + # Example where the interval is too narrow (true or > 1.03), and extendInt is set to 'no. + # Should throw an error, thus demonstrating that the uniroot_options works. + expect_warning( + implied_probabilities(my_odds, method='or', uniroot_options = list(interval = c(1, 1.01), extendInt = 'no')) + ) + +) + +# Converting between odds and probabilities ---- context("Converting between odds and probabilities") diff --git a/vignettes/Troubleshooting.Rmd b/vignettes/Troubleshooting.Rmd new file mode 100644 index 0000000..265c22e --- /dev/null +++ b/vignettes/Troubleshooting.Rmd @@ -0,0 +1,162 @@ +--- +title: "Troubleshooting" +author: "Jonas C. Lindstrøm" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Troubleshooting} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + + +In this document you can find some guidance on what to do if you get errors or weird results. + +Errors and warning messages will typically occur when the algorithms that convert +odds to probabilities fails to give a proper results. This does not necessarily +mean that there is a bug in the algorithms, it is just as likely that the mathematical +relationship between the odds and the underlying probabilities does not conform to +the assumptions needed for the different methods to work. + +In my experience, the following scenarios can often cause problems for at least +some of the methods: + +- Extremely large odds can often create problems. +- Many outcomes, such win-odds for competitions with many contestants. +- Very large bookamker margins. + +When the conversion from odds to probabilities fail, or give inappropriate results, +a warning will show that indicates what the problem might be. In addition, the output +from the implied_probabilities function will contain a vector that indicates which +lines there are problems. + +## Identifying problematic results + +Here is an example. In the code below there are 3 lines of 4-way odds that is converted +to probabilities using the 'additive' method. The first of these odds fails to be +properly converted and you get a warning saying 'Probabilities outside the 0-1 +range produced at 1 instances'. + +The point of this example is not to 'fix' the results or tweak the algorithm to +work. The algorithm works as it should, it is just that the mathematical relationship +between the odds and the underlying probabilities does not work well with the additive +method. The point is just to show how to find the problematic results, if they occur. + +The methods 'wpo', and 'bb' doesn't work with this set of odds either, but the +rest does. + +```{r example1_1} +require(implied) + +my_odds <- rbind(c(1.15, 5, 10, 25), + c(4.1, 4.2, 8.2, 2.1), + c(3.8, 4.7, 5.9, 2.3)) + +my_probs <- implied_probabilities(my_odds, method = 'additive') + +``` + + +The vector named "problematic" indicates that there is a problem in the first odds-line +in the input. + +```{r example1_2} +my_probs$problematic + +# Can also just list the line numbers +which(my_probs$problematic) + +``` + +And if we look at the probabilities, you will see that the 4th probability +in the first line is negative, which isn't a valid probability. + +```{r example1_3} +my_probs$probabilities +``` + + + + +## Warning: Probabilities outside the 0-1 range + +This warning means that some of the converted probabilities are outside the valid +range of probabilities, which is between 0 and 1. Most likely a negative probability. + +Unless there is an accompanying warning about uniroot-problems, there isn't much +to do about this, and you should conclude that the conversion method you have used +is incompatible with the odds you have. Try another method. + + + +## Warning: Error in stats::uniroot: f() values at end points not of opposite sign + +This error can happen when using the methods 'shin', 'or', 'power', or 'jsd'. These +methods convert the odds to probabilities using an equations solver called uniroot. +Uniroot does a search of possible values of the factor used in the methods, and +finds the factor that gives correct probabilities (ie they sum to 1). Sometimes +the solver cant find the probabilities. + +There are two possible reasons for why the solver cant find the correct factor and +the correct probabilities. The first reason (and the most likely) is that the method +you have chosen simply does not have a valid solution. Unfortunately, there is not +really much to do about it, other than using a different method. + +The second reason could be that some of the settings used in the solver does not +allow the algorithm to find the solution. You can change some of the settings in +the uniroot solver via the uniroot_options argument in implied_probabilities(). + +The following uniroot settings can be changed: interval, maxit, tol and extendInt. +Take a look at the help page for the uniroot function for more information about +the different settings. + + + +## Warning in log(x/y) : NaNs produced + +This warning sometimes occur with method 'jsd', when the odds are extreme or otherwise +difficult to convert. This does not get flagged as problematic, because it might +not actually be a problem. But you should check if the probabilities in question +seem reasonable. + + +## Error: Some inverse odds sum to less than 1. + +This error occurs when the naive implied probabilities sum to less than 1. The whole +point of the conversion methods in this package is to convert odds to proper probabilities +where the odds imply a total probability greater than 1, which gives the bookmaker's +an advantage. If they sum to less than one it means that the bookmaker's odds are advantageous +for the bettor. This is a very unlikely scenario, and it is most likely due to an +error in your data processing pipeline. + +The conversion methods might be made to work in this case, but I haven't tried or +tested it. This might cahnge in the future. + + +## Warning: Could not find z: Did not converge in x instances. Some results may be unreliable. + +This warning can happen with method 'shin', and with shin_method = 'js'. There are +two possible fixes: + + - Try to increase shin_maxiter from the default 1000 to something larger, like 2000. + - Change shin_method to 'uniroot'. + + +## Warning: z estimated to be negative: Some results may be unreliable. + +This warning can happen with methods 'shin' and 'bb'. I am actually not sure if +the results should be considered unreliable, or if they can be useful. These are +not flagged as problematic, and you need to look at the 'zvalues' in the output +to see which ones are negative. + + + + + diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index d3ea57a..45ec38a 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -1,252 +1,277 @@ ---- -title: "Introduction to the implied package" -author: "Jonas C. Lindstrøm" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Introduction to the implied package} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabiltiies. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demostration on how to convert probabiliteis to bookmaker odds. - -A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the [favorite-longshot bias](https://en.wikipedia.org/wiki/Favourite-longshot_bias). The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias. - - -## The basic method - -The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilties, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the cooresponding inverted odds, then the probabilities are computed as - -pi = ri / sum(r) - -This method tend to be the least accurate of the methods in this package. I have also seen this normalization method been referred to as the multiplicative method. - -The implied_probabilities() function return a list with the proper probabilities (as a matrix) and the bookmaker margins. - -In the examples below are three sets of bookmaker odds from three football matches. - -```{r basic} - -library(implied) - -# One column for each outcome, one row for each race or match. -my_odds <- rbind(c(4.20, 3.70, 1.95), - c(2.45, 3.70, 2.90), - c(2.05, 3.20, 3.80)) -colnames(my_odds) <- c('Home', 'Draw', 'Away') - -res1 <- implied_probabilities(my_odds) - -res1$probabilities - -res1$margin - -``` - - -## Margin Weights Proportional to the Odds - -This method is from [Joseph Buchdahl's Wisom of the Crowds document](https://www.football-data.co.uk/wisdom_of_crowd_bets), and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias. - -The probabilities are calculated from the bookmaker odds O using the following formula - -pi = (n - M * Oi) / n * Oi - -where n is the number of outcomes, and M is the bookmaker margin. - -```{r wpo} -res2 <- implied_probabilities(my_odds, method = 'wpo') - -res2$probabilities - -# The margins applied to each outcome. -res2$specific_margins -``` - -## The odds ratio method - -The odds ratio method is also from the Wisdom of the Crowds document, but is originally from an [article by Keith Cheung](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/). This method models the relationship between the proper probabilities and the improper bookmaker probabilties using the odds ratio (OR) function: - -OR = pi (1 - ri) / ri (1 - pi) - -This gives the probabilities - -pi = ri / OR + ri - (OR * ri) - -where the odds ratio OR is selected so that sum(pi) = 1. - - -```{r or} -res3 <- implied_probabilities(my_odds, method = 'or') - -res3$probabilities - -# The odds ratios converting the proper probablities to bookmaker probabilities. -res3$odds_ratios -``` - -## The power method - -The power method models the bookmaker probabilties as a power function of the proper probabilties. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method. - -pi = ri(1/k) - -where k is selected so that sum(pi) = 1. - -```{r power} -res4 <- implied_probabilities(my_odds, method = 'power') - -res4$probabilities - -# The inverse exponents (n) used to convert the proper probablities to bookmaker probabilities. -res4$exponents -``` - -## The additive method - -The additive method removes the margin from the naive probabilites by subtracting an equal amount of of the margin from each outcome. The formula used is - -pi = ri - ((sum(r) - 1) / n) - -If there are only two outcomes, the additive method and Shin's method are equivalent. - - -```{r additive1} - -res5 <- implied_probabilities(my_odds, method = 'additive') - -res5$probabilities - -``` - -One problem with the additive method is that it can produce negative probabilities, escpecially for outcomes with low probabilties. This can often be the case when there are many outcomes, for example in racing sports. If this happens, you will be given a warning. Here is an example taken from Clarke et al (2017): - -```{r additive2} - -my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) -colnames(my_odds2) <- paste('X', 1:6, sep='') - -res6 <- implied_probabilities(my_odds2, method = 'additive') - -res6$probabilities - -``` - - - - -## Balanced books and Shin's method - -The two methods referred to as "balanced book" and Shin's method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise "true" uncertainty about the outcome. The proportion of inside traders is denoted Z. - -The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin's method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur. - -We can not know what the insiders know, but both methods gives an estimate of the proportion of insiders. - - -```{r shin} -res7 <- implied_probabilities(my_odds, method = 'shin') - -res7$probabilities - -# The estimated proportion of inside traders. -res7$zvalues -``` - - -```{r bb} -# Balanced books -res8 <- implied_probabilities(my_odds, method = 'bb') - -res8$probabilities - -# The estimated proportion of inside traders. -res8$zvalues -``` - - -## The Jensen–Shannon distance method - -This method sees the improper bookmaker probabilties as a noisy version of the true underlying probabilities, and uses the [Jensen–Shannon (JS) distance](https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence) as a measure of how noisy the bookmaker probabilities are. - -For the sake of finding the denoised proabilities pi, each outcome i is modelled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi: - - -D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi) - -where mi = (pi + ri) / 2 - -and - -BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y)) - -is the "binomial" Kullback–Leibler divergence. - -The solution is found numerically by finding the value of of D so that sum(pi) = 1. - -The method was developed by Christopher D. Long (twitter: [\@octonion](https://twitter.com/octonion)), and described in a series of Twitter postings [[1](https://twitter.com/octonion/status/1412847000068952064)]. - -```{r jsd} -# Balanced books -res9 <- implied_probabilities(my_odds, method = 'jsd') - -res9$probabilities - -# The estimated noise (JS distance) -res9$distance -``` - -## Converting probabilities to odds - -There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details. - -In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy. - - -```{r imp_odds1} - -res_odds1 <- implied_odds(res4$probabilities[1,], - margin = res4$margin[1], - method = 'power') - -res_odds1$odds - -# The exponents. -res_odds1$exponents - -# Compare to the exponent from the odds-to-probability conversion. -res4$exponents[1] - -``` - - -## Other packages -The [odds.converter](https://cran.r-project.org/package=odds.converter) package can convert between different odds formats, including to decimal odds, that this package requires. - - -## Literature -Here are some relevant references and links: - -* Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET [Link](https://www.football-data.co.uk/wisdom_of_crowd_bets) - -* Keith Cheung (2015) Fixed-odds betting and traditional odds [Link](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/) - -* Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround [Link](http://www.sciencepublishinggroup.com/journal/paperinfo?journalid=155&doi=10.11648/j.ajss.20170506.12) - -* Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias [Link](https://doi.org/10.2307/2234526) - -* Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims [Link](https://doi.org/10.2307/2234526) - -* Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin [Link](https://doi.org/10.2307/2235458) - -* John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.[Link](https://www.semanticscholar.org/paper/Optimal-Determination-of-Bookmakers'-Betting-Odds%3A-Fingleton-Waldron/e576f3b103e0ba041ae072a9201b948059c7806e) - - +--- +title: "Introduction to the implied package" +author: "Jonas C. Lindstrøm" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{Introduction to the implied package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This package contains functions that convert between bookmaker odds and probabilities. The function implied_probabilities() convert bookmaker odds into proper probabilities. The function implied_odds() does the inverse conversion, it turns proper probabilities into bookmaker odds. Several methods are available, with different assumptions regarding the underlying mechanism the bookmakers convert their probabilities into odds. The main focus of this introduction is present how the package works and the methods that convert bookmaker odds into probabilities and. Towards the end is a small demonstration on how to convert probabilities to bookmaker odds. + +A naive conversion of bookmaker odds into probabilities has two main problems. The first is that the probabilities are not proper probabilities, since they sum to more than 1. The excess probability is called the bookmakers margin. The second problem is that the probabilities, even if the margin is removed, will be biased in several ways, usually because of what is called the [favorite-longshot bias](https://en.wikipedia.org/wiki/Favourite-longshot_bias). The methods in this package remove the bookmaker margin and some of them also adjust for favorite-longshot bias. + +In version 0.5 a new feature was introduced. It is now possible to convert odds to probabilities with multiple winners, which means that the probabilities should sum to something greater than 1. One example of this is when you have odds for different teams/players to finish top 3 in a league, in which case the probabilities should sum to 3 instead of 1. The details are explained towards the end of this document. + + +# Methods +## The basic method + +The default method used by the function implied_probabilities() is called the basic method. This is the simplest and most common method for converting bookmaker odds into probabilities, and is obtained by dividing the naive probabilities (the inverted odds) by the sum of the inverted odds. If pi is the true underlying probability for outcome i, and ri is the corresponding inverted odds, then the probabilities are computed as + +pi = ri / sum(r) + +This method tend to be the least accurate of the methods in this package. I have also seen this normalization method been referred to as the multiplicative method. + +The implied_probabilities() function return a list with the proper probabilities (as a matrix) and the bookmaker margins. + +In the examples below are three sets of bookmaker odds from three football matches. + +```{r basic} + +library(implied) + +# One column for each outcome, one row for each race or match. +my_odds <- rbind(c(4.20, 3.70, 1.95), + c(2.45, 3.70, 2.90), + c(2.05, 3.20, 3.80)) +colnames(my_odds) <- c('Home', 'Draw', 'Away') + +res1 <- implied_probabilities(my_odds) + +res1$probabilities + +res1$margin + +``` + + +## Margin Weights Proportional to the Odds + +This method is from [Joseph Buchdahl's Wisom of the Crowds document](https://www.football-data.co.uk/wisdom_of_crowd_bets), and assumes that the margin applied by the bookmaker for each of the outcome is proprtional to the probabilitiy of the outcome. In other words, the excessive probabilties are unevenly applied in a way that is reflects the favorite-longshot bias. + +The probabilities are calculated from the bookmaker odds O using the following formula + +pi = (n - M * Oi) / n * Oi + +where n is the number of outcomes, and M is the bookmaker margin. + +```{r wpo} +res2 <- implied_probabilities(my_odds, method = 'wpo') + +res2$probabilities + +# The margins applied to each outcome. +res2$specific_margins +``` + +## The odds ratio method + +The odds ratio method is also from the Wisdom of the Crowds document, but is originally from an [article by Keith Cheung](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/). This method models the relationship between the proper probabilities and the improper bookmaker probabilties using the odds ratio (OR) function: + +OR = pi (1 - ri) / ri (1 - pi) + +This gives the probabilities + +pi = ri / OR + ri - (OR * ri) + +where the odds ratio OR is selected so that sum(pi) = 1. + + +```{r or} +res3 <- implied_probabilities(my_odds, method = 'or') + +res3$probabilities + +# The odds ratios converting the proper probablities to bookmaker probabilities. +res3$odds_ratios +``` + +## The power method + +The power method models the bookmaker probabilities as a power function of the proper probabilities. This method is also described in the Wisdom of the Crowds document, where it is referred to as the logarithmic method. + +pi = ri(1/k) + +where k is selected so that sum(pi) = 1. + +```{r power} +res4 <- implied_probabilities(my_odds, method = 'power') + +res4$probabilities + +# The inverse exponents (n) used to convert the proper probablities to bookmaker probabilities. +res4$exponents +``` + +## The additive method + +The additive method removes the margin from the naive probabilities by subtracting an equal amount of of the margin from each outcome. The formula used is + +pi = ri - ((sum(r) - 1) / n) + +If there are only two outcomes, the additive method and Shin's method are equivalent. + + +```{r additive1} + +res5 <- implied_probabilities(my_odds, method = 'additive') + +res5$probabilities + +``` + +One problem with the additive method is that it can produce negative probabilities, escpecially for outcomes with low probabilties. This can often be the case when there are many outcomes, for example in racing sports. If this happens, you will be given a warning. Here is an example taken from Clarke et al (2017): + +```{r additive2} + +my_odds2 <- t(matrix(1/c(0.870, 0.2, 0.1, 0.05, 0.02, 0.01))) +colnames(my_odds2) <- paste('X', 1:6, sep='') + +res6 <- implied_probabilities(my_odds2, method = 'additive') + +res6$probabilities + +``` + + + + +## Balanced books and Shin's method + +The two methods referred to as "balanced book" and Shin's method are based on the assumption that there is a small proportion of bettors that actually knows the outcome (called inside traders), and the rest of the bettors reflect the otherwise "true" uncertainty about the outcome. The proportion of inside traders is denoted Z. + +The two methods differ in what assumptions they make about how the bookmakers react to the pressence of inside traders. Shin's method is derived from the assumption that the bookmakers tries to maximize their profits when there are inside traders. The balanced books method assumes the bookmakers tries to minimize their losses in the worst case scenario if the least likely outcome were to acctually occur. + +We can not know what the insiders know, but both methods gives an estimate of the proportion of insiders. + + +```{r shin} +res7 <- implied_probabilities(my_odds, method = 'shin') + +res7$probabilities + +# The estimated proportion of inside traders. +res7$zvalues +``` + + +```{r bb} +# Balanced books +res8 <- implied_probabilities(my_odds, method = 'bb') + +res8$probabilities + +# The estimated proportion of inside traders. +res8$zvalues +``` + + +## The Jensen–Shannon distance method + +This method sees the improper bookmaker probabilities as a noisy version of the true underlying probabilities, and uses the [Jensen–Shannon (JS) distance](https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence) as a measure of how noisy the bookmaker probabilities are. + +For the sake of finding the denoised probabilities pi, each outcome i is modeled as a binomial variable, with outcomes i and NOT i. These have probabilities pi and 1-pi, with corresponding improper bookmaker probabilities ri and 1-ri. For a given noise-level D, as measued by the symmetric JS distance, the underlying probabilities can be found by solving the JS distance equation for pi: + + +D = 0.5 * BKL(pi, mi) + 0.5 * BKL(ri, mi) + +where mi = (pi + ri) / 2 + +and + +BKL(x, y) = x * log(x/y) + (1-x) * log((1-x)/(1-y))) + y * log(y/x) + (1-y) * log((1-y)/(1-y)) + +is the "binomial" Kullback–Leibler divergence. + +The solution is found numerically by finding the value of of D so that sum(pi) = 1. + +The method was developed by Christopher D. Long (twitter: @octonion), and described in a series of Twitter postings. + +```{r jsd} +# Balanced books +res9 <- implied_probabilities(my_odds, method = 'jsd') + +res9$probabilities + +# The estimated noise (JS distance) +res9$distance +``` + + +# Multiple winning outcomes + +In the examples above it has been assumed that the probabilities should sum to 1. This is the correct approach when only 1 of the possible outcomes occur, but this is not correct when multiple outcomes occur. One example of this are odds for players/teams to reach the final in a tournament. In this case the probabilities should sum to 2, as two of the outcomes will be considered a win. Another example is placing in the top 5 in a league, in which case the probabilities should sum to 5. + +You can change the target_probability to something other than 1, and this works for most methods. + +```{r target_prob} +# Example odds. +odds_reach_final <- c(1.6, 2.63, 3.3, 3.7, 5.6, 7.1, 12.5, 16.5, 25) + +res10 <- implied_probabilities(odds_reach_final, method = 'or', target_probability = 2) + +res10$probabilities + +sum(res10$probabilities) + +``` + + +# Converting probabilities to odds + +There is also a function that can do the opposite what the implied_probabilities() function does, namely the implied_odds() function. This function converts probabilities to odds, for a given margin, the inverse of the methods as described above. Not all methods have been implemented yet. Take a look at the help file for the function for more details. + +In the code example below we use take the results of converting the odds to probabilities using the power method, and convert them back to odds again, with the same margin. We pretty much recover the original odds, except for some small numerical inaccuracy. + + +```{r imp_odds1} + +res_odds1 <- implied_odds(res4$probabilities[1,], + margin = res4$margin[1], + method = 'power') + +res_odds1$odds + +# The exponents. +res_odds1$exponents + +# Compare to the exponent from the odds-to-probability conversion. +res4$exponents[1] + +``` + + +# Other packages +The [odds.converter](https://cran.r-project.org/package=odds.converter) package can convert between different odds formats, including to decimal odds, that this package requires. + + +# Literature +Here are some relevant references and links: + +* Joseph Buchdahl - USING THE WISDOM OF THE CROWD TO FIND VALUE IN A FOOTBALL MATCH BETTING MARKET [Link](https://www.football-data.co.uk/wisdom_of_crowd_bets) + +* Keith Cheung (2015) Fixed-odds betting and traditional odds [Link](https://www.sportstradingnetwork.com/article/fixed-odds-betting-traditional-odds/) + +* Stephen Clarke, Stephanie Kovalchik & Martin Ingram (2017) Adjusting Bookmaker’s Odds to Allow for Overround [Link](http://www.sciencepublishinggroup.com/journal/paperinfo?journalid=155&doi=10.11648/j.ajss.20170506.12) + +* Hyun Song Shin (1992) Prices of State Contingent Claims with Insider Traders, and the Favourite-Longshot Bias [Link](https://doi.org/10.2307/2234526) + +* Hyun Song Shin (1993) Measuring the Incidence of Insider Trading in a Market for State-Contingent Claims [Link](https://doi.org/10.2307/2234526) + +* Bruno Jullien & Bernard Salanié (1994) Measuring the incidence of insider trading: A comment on Shin [Link](https://doi.org/10.2307/2235458) + +* John Fingleton & Patrick Waldron (1999) Optimal Determination of Bookmakers' Betting Odds: Theory and Tests.[Link](https://www.semanticscholar.org/paper/Optimal-Determination-of-Bookmakers'-Betting-Odds%3A-Fingleton-Waldron/e576f3b103e0ba041ae072a9201b948059c7806e) + +